From 25778cf9da3d30bc4a23839be0b5e8d4ca42812e Mon Sep 17 00:00:00 2001 From: Zach DeCook Date: Wed, 2 Dec 2020 22:26:27 -0500 Subject: Improve p parsing, and include inline elements --- thml2gmi.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/thml2gmi.py b/thml2gmi.py index 9137ab5..5705ebf 100755 --- a/thml2gmi.py +++ b/thml2gmi.py @@ -14,6 +14,11 @@ def parseSomething(thing): parseDiv(thing) elif (thing.tag == 'p'): parseP(thing) + elif (thing.tag in ['span']): + parseInline(thing) + else: + for thing in list(thing): + parseSomething(thing) def parseDiv(div): indentLevel = int(div.tag[3:]) @@ -23,8 +28,17 @@ def parseDiv(div): parseSomething(child) def parseP(p): - text = "".join([x for x in p.itertext()]) - print(text.replace("\n", " ")) + parseInline(p) + # Each paragraph should end with a newline + print("") + +def parseInline(inline): + if(inline.text): + print(inline.text.replace("\n"," "), end='') + for thing in inline: + parseSomething(thing) + if(thing.tail): + print(thing.tail.replace("\n"," "), end='') if __name__ == '__main__': main(sys.argv) -- cgit 1.4.1