about summary refs log tree commit diff
diff options
context:
space:
mode:
authorZach DeCook <zachdecook@librem.one>2020-12-03 23:15:08 -0500
committerZach DeCook <zachdecook@librem.one>2020-12-03 23:17:50 -0500
commit1e5cb63591fbb2745e34a15e076d111e87b6432b (patch)
tree3121da4ef9cd9b66710cc1eb9234a863f0bbd53e
parent514e82b5c4c2dbced26d6e230bdc11a3aff523d0 (diff)
downloadthml2gmi-1e5cb63591fbb2745e34a15e076d111e87b6432b.tar.gz
Improve parsing of lists, inline notes
-rwxr-xr-xthml2gmi.py48
1 files changed, 35 insertions, 13 deletions
diff --git a/thml2gmi.py b/thml2gmi.py
index 38d542f..02ab4b6 100755
--- a/thml2gmi.py
+++ b/thml2gmi.py
@@ -9,6 +9,9 @@ def main(argv):
     for thing in list(body):
         parseSomething(thing)
 
+def nlprint(string):
+    """Print, starting with a newline."""
+    print("\n" + string, end='')
 def iprint(string):
     """Inline-printing doesn't end with newline"""
     print(string, end='')
@@ -16,15 +19,14 @@ def iprint(string):
 def parseSomething(thing):
     if(len(thing.tag) == 4 and thing.tag[:3] == 'div'):
         parseDiv(thing)
-    elif (thing.tag in ['div', 'p']):
+    elif (thing.tag in ['p']):
         parseP(thing)
+    elif thing.tag in ['div']:
+        parseBlock(thing)
     elif (thing.tag == 'argument'):
-        iprint("> "); parseInline(thing); print("")
-    elif (thing.tag in ['a', 'cite', 'name', 'scripRef', 'span']):
-        parseInline(thing)
-    elif (thing.tag == 'i'):
-        # Convention: Wrapping italics in asterisks
-        iprint('*'); parseInline(thing); iprint('*')
+        nlprint("> "); parseInline(thing);
+    elif (thing.tag in ['a', 'cite', 'i', 'name', 'note', 'scripRef', 'span']):
+        parseIl(thing)
     elif (thing.tag == 'ul'):
         parseList(thing)
     else:
@@ -34,32 +36,52 @@ def parseSomething(thing):
 def parseDiv(div):
     indentLevel = int(div.tag[3:])
     title = div.get('title')
-    print('#' * indentLevel + ' ' + title)
+    nlprint('#' * indentLevel + ' ' + title)
     for child in list(div):
         parseSomething(child)
 
+def parseBlock(block):
+    # Block level elements should start with newlines
+    print("")
+    if(block.text):
+        iprint(block.text.replace("\n"," "))
+    for thing in block:
+        # and may contain other block level elements
+        parseSomething(thing)
+        if(thing.tail):
+            iprint(thing.tail.replace("\n"," "))
+
 def parseP(p):
+    # Each paragraph should start with a newline
+    print("")
     i = p.find('i')
     if (not p.text) and i is not None and len(p) == 1 and (not i.tail):
         iprint("> ")
         p = i
     parseInline(p)
-    # Each paragraph should end with a newline
-    print("")
+
+def parseIl(inline):
+    """Parse an inline item, being tag-aware"""
+    if(inline.tag == 'i'):
+        # Convention: Wrapping italics in asterisks
+        iprint("*"); parseInline(inline); iprint("*")
+    elif(inline.tag == 'note'):
+        iprint("["); parseInline(inline); iprint("]");
+    else:
+        parseInline(inline);
 
 def parseInline(inline):
     if(inline.text):
         iprint(inline.text.replace("\n"," "))
     for thing in inline:
-        parseSomething(thing)
+        parseIl(thing)
         if(thing.tail):
             iprint(thing.tail.replace("\n"," "))
 
 def parseList(ul):
     for child in ul:
-        iprint("* ")
+        nlprint("* ")
         parseInline(child)
-        print("")
 
 if __name__ == '__main__':
     main(sys.argv)