usfm2gmi - Convert usfm bibles into gemtext (python library/utility)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

#!/usr/bin/env python3
import fileinput

def printf(string):
  print(string,end='')

def smallcaps(word):
  sc = 'ᴀʙᴄᴅᴇғɢʜɪᴊᴋʟᴍɴᴏᴘǫʀsᴛᴜᴠᴡxʏᴢ'
  new = ''
  for c in word:
    if c >= 'a' and c <= 'z':
      # I like C programming.
      new += sc[ord(c)-ord('a')]
    else:
      new += c
  return new

def convert(line):
  """Convert a string to a list of tuples, each a token"""
  # TODO: preserve the lack of whitespace before a backslash.
  split = line.replace('\\', ' \\').replace('\\nd*','\\nd* ').split()
  out = ''
  nd = False
  if len(split) == 0:
    return out
  elif split[0] in ['\\mt1','\\mt','\\ms']:
    return '\n# ' + ' '.join(split[1:])
  # TODO: parse as word for title tags in title line
  elif split[0] in ['\\mt2','\\s']:
    return '\n## ' + ' '.join(split[1:])
  elif split[0] in ['\\mt3']:
    return '\n### ' + ' '.join(split[1:])
  elif split[0] == '\\b':
    return '\n'
  elif split[0] == '\\rem':
    return out
  skip = 0
  for word in split:
    if skip > 0:
      skip = skip - 1
      continue
    if word in ['\\v', '\\c']:
      skip = 1
    elif word in ['\\p','\\m']:
      out += '\n'
    elif word in ['\\pi','\\pi1']:
      out += '\n\t'
    elif word in ['\\q']:
      out += '\n> '
    elif word in ['\\wj','\\wj*']:
      continue
    elif word == '\\nd':
      nd = True
    elif word == '\\nd*':
      nd = False
    else:
     if nd:
      out += smallcaps(word) + ' '
     else:
      out += word + ' '
  return out

def main():
  """Read usfm from stdin, output gemtext to stdout
     ./usfm2gmi <in.usfm >out.md
  """
  for line in fileinput.input():
    gmi = convert(line)
    printf(gmi)

if __name__ == '__main__':
  main()