1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
#!/usr/bin/env python3
import fileinput
def printf(string):
print(string,end='')
def smallcaps(word):
sc = 'ᴀʙᴄᴅᴇғɢʜɪᴊᴋʟᴍɴᴏᴘǫʀsᴛᴜᴠᴡxʏᴢ'
new = ''
for c in word:
if c >= 'a' and c <= 'z':
# I like C programming.
new += sc[ord(c)-ord('a')]
else:
new += c
return new
def convert(line):
"""Convert a string to a list of tuples, each a token"""
# TODO: preserve the lack of whitespace before a backslash.
split = line.replace('\\', ' \\').replace('\\nd*','\\nd* ').replace('\\f*','\\f* ').replace('\\wj*','\\wj* ').split()
out = ''
nd = False
if len(split) == 0:
return out
elif split[0] in ['\\mt1','\\mt','\\ms','\\h']:
return '\n# ' + ' '.join(split[1:])
# TODO: parse as word for title tags in title line
elif split[0] in ['\\mt2','\\s']:
return '\n## ' + ' '.join(split[1:])
elif split[0] in ['\\mt3','\\d']:
return '\n### ' + ' '.join(split[1:])
elif split[0] == '\\b':
return '\n'
elif split[0] == '\\rem':
return out
skip = 0
for word in split:
if skip > 0:
skip = skip - 1
elif word in ['\\id','\\ide']:
skip = 1
elif word in ['\\v','\\c']:
skip = 1
elif word in ['\\p','\\m']:
out += '\n'
elif word in ['\\pi','\\pi1']:
out += '\n\t'
elif word in ['\\q', '\\q1']:
out += '\n> '
elif word in ['\\q2']:
out += '\n>\t'
elif word in ['\\q3']:
out += '\n>\t\t'
elif word in ['\\qs']:
out += '\t'
elif word in ['\\qs*']:
continue
elif word in ['\\wj','\\wj*']:
continue
elif word == '\\nd':
nd = True
elif word == '\\nd*':
nd = False
# Footnotes (https://ubsicap.github.io/usfm/notes_basic/fnotes.html)
elif word == '\\f':
out += '['
skip = 1 # the next character is the footnote caller
elif word == '\\fr':
skip = 1 # verse reference not necessary for inline fn
elif word == '\\f*':
out += ']'
# TODO: support Endnotes (\fe and \fe*)
elif word in ['\\ft']:
continue # TODO: fancy formatting of more types
else:
if nd:
out += smallcaps(word) + ' '
else:
out += word + ' '
return out
def main():
"""Read usfm from stdin, output gemtext to stdout
./usfm2gmi <in.usfm >out.md
"""
for line in fileinput.input():
gmi = convert(line)
printf(gmi)
if __name__ == '__main__':
main()
|