about summary refs log tree commit diff
diff options
context:
space:
mode:
authorZach DeCook <zachdecook@librem.one>2020-12-02 22:26:27 -0500
committerZach DeCook <zachdecook@librem.one>2020-12-02 22:26:27 -0500
commit25778cf9da3d30bc4a23839be0b5e8d4ca42812e (patch)
treee6059f2f26fd2a3e86ef7ab88c9045763f319cfe
parent028e867041aa919e8ba94c4a1859b33933d13c3c (diff)
downloadthml2gmi-25778cf9da3d30bc4a23839be0b5e8d4ca42812e.tar.gz
Improve p parsing, and include inline elements
-rwxr-xr-xthml2gmi.py18
1 files changed, 16 insertions, 2 deletions
diff --git a/thml2gmi.py b/thml2gmi.py
index 9137ab5..5705ebf 100755
--- a/thml2gmi.py
+++ b/thml2gmi.py
@@ -14,6 +14,11 @@ def parseSomething(thing):
         parseDiv(thing)
     elif (thing.tag == 'p'):
         parseP(thing)
+    elif (thing.tag in ['span']):
+        parseInline(thing)
+    else:
+        for thing in list(thing):
+            parseSomething(thing)
 
 def parseDiv(div):
     indentLevel = int(div.tag[3:])
@@ -23,8 +28,17 @@ def parseDiv(div):
         parseSomething(child)
 
 def parseP(p):
-    text = "".join([x for x in p.itertext()])
-    print(text.replace("\n", " "))
+    parseInline(p)
+    # Each paragraph should end with a newline
+    print("")
+
+def parseInline(inline):
+    if(inline.text):
+        print(inline.text.replace("\n"," "), end='')
+    for thing in inline:
+        parseSomething(thing)
+        if(thing.tail):
+            print(thing.tail.replace("\n"," "), end='')
 
 if __name__ == '__main__':
     main(sys.argv)