1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
#!/usr/bin/env python3
"""Convert usfm line-by-line into gemtext"""
__author__ = "Zach DeCook"
__email__ = "zachdecook@librem.one"
__copyright__ = "Copyright (C) 2021 Zach DeCook"
__license__ = "AGPL"
__version__ = "3"
import fileinput
def printf(string):
print(string,end='')
def smallcaps(word):
sc = 'ᴀʙᴄᴅᴇғɢʜɪᴊᴋʟᴍɴᴏᴘǫʀsᴛᴜᴠᴡxʏᴢ'
new = ''
for c in word:
if c >= 'a' and c <= 'z':
# I like C programming.
new += sc[ord(c)-ord('a')]
else:
new += c
return new
def superscript(word):
#TODO: also superscript lowercase letters
ss='⁰¹²³⁴⁵⁶⁷⁸⁹:;<=>?@ᴬᴮCᴰᴱFᴳᴴᶦᴶᴷᴸᴹᴺᴼᴾQᴿSᵀᵁⱽᵂ'
new = ''
for c in word:
if c >= '0' and c <= 'W':
new += ss[ord(c)-ord('0')]
else:
new += c
return new
def convert(line, printStrongs=False):
"""Convert a string to a list of tuples, each a token"""
# TODO: preserve the lack of whitespace before a backslash.
split = line.replace('\\', ' \\').replace('\\nd*','\\nd* ').replace('\\+nd*','\\+nd* ').replace('\\f*','\\f* ').replace('\\wj*','\\wj* ').replace('\\w*',' \\w* ').replace('\\+w*', '\\+w* ').split()
out = ''
nd = False
superS = False
if len(split) == 0:
return out
elif split[0] in ['\\mt1','\\mt','\\ms','\\h']:
return '\n# ' + convert(' '.join(split[1:]))
# TODO: parse as word for title tags in title line
elif split[0] in ['\\mt2','\\s','\\s1']:
return '\n## ' + convert(' '.join(split[1:]))
elif split[0] in ['\\mt3','\\d', '\\sp']:
return '\n### ' + convert(' '.join(split[1:]))
elif split[0] == '\\b':
return '\n'
elif split[0] == '\\rem':
return out
skip = 0
for word in split:
if skip > 0:
skip = skip - 1
elif word in ['\\id','\\ide']:
skip = 1
elif word in ['\\v','\\c']:
skip = 1
elif word in ['\\p','\\m']:
out += '\n'
elif word in ['\\pi','\\pi1','\\mi']:
out += '\n\t'
elif word in ['\\li1']:
out += '\n* '
elif word in ['\\q', '\\q1']:
out += '\n> '
elif word in ['\\q2', '\\q22']: # \q22 is bad input
out += '\n>\t'
elif word in ['\\q3']:
out += '\n>\t\t'
elif word in ['\\qs']:
out += '\t'
elif word in ['\\qs*']:
continue
elif word in ['\\r']:
out += '\n> '
elif word in ['\\wj','\\wj*']:
continue
elif word in ['\\em','\\it']:
out += '*'
elif word in ['\\em*', '\\it*']:
out = out.rstrip() + '*'
elif word in ['\\nd','\\+nd']:
nd = True
elif word in ['\\nd*','\\+nd*']:
nd = False
# Footnotes (https://ubsicap.github.io/usfm/notes_basic/fnotes.html)
elif word == '\\f':
out += '['
skip = 1 # the next character is the footnote caller
elif word == '\\fr':
skip = 1 # verse reference not necessary for inline fn
elif word == '\\f*':
out += ']'
# Cross-references (https://ubsicap.github.io/usfm/notes_basic/xrefs.html)
elif word == '\\x':
out += '('
skip = 1 # next character is xref caller
elif word == '\\xo':
skip = 1 # verse reference not necessary for inline xref
elif word in ['\\xt']:
continue
elif word == '\\x*':
out += ')'
# TODO: support Endnotes (\fe and \fe*)
elif word in ['\\ft']:
continue # TODO: fancy formatting of more types
# Words which appear in the glossary.
elif word in ['\\w','\\w*', '\\+w', '\\+w*']:
continue
elif word in ['\\nb']:
continue
elif word == '\\+sup':
superS = True
elif word == '\\+sup*':
superS = False
elif '|strong="' in word:
spl = word.split('|')
out += spl[0]
if printStrongs:
out += superscript(spl[1][8:-1])
out += ' '
elif 'x-morph="' in word:
continue
# Remove those extra spaces that sneak in.
elif word in [',', '.', ';', '”', ',”', '.”', '?”', ')', ':', '!', '?', '.’', '.’”', '?’”', '?’', ';”', '!”', ');', '),', '’s', '.)']:
if out[-1] == ' ':
out = out[:-1] + word + ' '
else:
out += word + ' '
elif word in ['“', '(', '‘']:
out += word
else:
if nd:
out += smallcaps(word) + ' '
if superS:
out += superscript(word) + ' '
else:
out += word + ' '
return out
def main():
"""Read usfm from stdin, output gemtext to stdout
./usfm2gmi <in.usfm >out.md
"""
for line in fileinput.input():
gmi = convert(line)
printf(gmi)
if __name__ == '__main__':
main()
|