Theological Markup Language to gemtext converter
Improve parsing of lists, inline notes
| -rwxr-xr-x | thml2gmi.py | 48 |
1 files changed, 35 insertions, 13 deletions
diff --git a/thml2gmi.py b/thml2gmi.py index 38d542f..02ab4b6 100755 --- a/thml2gmi.py +++ b/thml2gmi.py @@ -9,6 +9,9 @@ def main(argv): for thing in list(body): parseSomething(thing) +def nlprint(string): + """Print, starting with a newline.""" + print("\n" + string, end='') def iprint(string): """Inline-printing doesn't end with newline""" print(string, end='') @@ -16,15 +19,14 @@ def iprint(string): def parseSomething(thing): if(len(thing.tag) == 4 and thing.tag[:3] == 'div'): parseDiv(thing) - elif (thing.tag in ['div', 'p']): + elif (thing.tag in ['p']): parseP(thing) + elif thing.tag in ['div']: + parseBlock(thing) elif (thing.tag == 'argument'): - iprint("> "); parseInline(thing); print("") - elif (thing.tag in ['a', 'cite', 'name', 'scripRef', 'span']): - parseInline(thing) - elif (thing.tag == 'i'): - # Convention: Wrapping italics in asterisks - iprint('*'); parseInline(thing); iprint('*') + nlprint("> "); parseInline(thing); + elif (thing.tag in ['a', 'cite', 'i', 'name', 'note', 'scripRef', 'span']): + parseIl(thing) elif (thing.tag == 'ul'): parseList(thing) else: @@ -34,32 +36,52 @@ def parseSomething(thing): def parseDiv(div): indentLevel = int(div.tag[3:]) title = div.get('title') - print('#' * indentLevel + ' ' + title) + nlprint('#' * indentLevel + ' ' + title) for child in list(div): parseSomething(child) +def parseBlock(block): + # Block level elements should start with newlines + print("") + if(block.text): + iprint(block.text.replace("\n"," ")) + for thing in block: + # and may contain other block level elements + parseSomething(thing) + if(thing.tail): + iprint(thing.tail.replace("\n"," ")) + def parseP(p): + # Each paragraph should start with a newline + print("") i = p.find('i') if (not p.text) and i is not None and len(p) == 1 and (not i.tail): iprint("> ") p = i parseInline(p) - # Each paragraph should end with a newline - print("") + +def parseIl(inline): + """Parse an inline item, being tag-aware""" + if(inline.tag == 'i'): + # Convention: Wrapping italics in asterisks + iprint("*"); parseInline(inline); iprint("*") + elif(inline.tag == 'note'): + iprint("["); parseInline(inline); iprint("]"); + else: + parseInline(inline); def parseInline(inline): if(inline.text): iprint(inline.text.replace("\n"," ")) for thing in inline: - parseSomething(thing) + parseIl(thing) if(thing.tail): iprint(thing.tail.replace("\n"," ")) def parseList(ul): for child in ul: - iprint("* ") + nlprint("* ") parseInline(child) - print("") if __name__ == '__main__': main(sys.argv) |