diff options
| author | Zach DeCook <zachdecook@librem.one> | 2020-12-02 22:26:27 -0500 |
|---|---|---|
| committer | Zach DeCook <zachdecook@librem.one> | 2020-12-02 22:26:27 -0500 |
| commit | 25778cf9da3d30bc4a23839be0b5e8d4ca42812e (patch) | |
| tree | e6059f2f26fd2a3e86ef7ab88c9045763f319cfe | |
| parent | 028e867041aa919e8ba94c4a1859b33933d13c3c (diff) | |
| download | thml2gmi-25778cf9da3d30bc4a23839be0b5e8d4ca42812e.tar.gz | |
Improve p parsing, and include inline elements
| -rwxr-xr-x | thml2gmi.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/thml2gmi.py b/thml2gmi.py index 9137ab5..5705ebf 100755 --- a/thml2gmi.py +++ b/thml2gmi.py @@ -14,6 +14,11 @@ def parseSomething(thing): parseDiv(thing) elif (thing.tag == 'p'): parseP(thing) + elif (thing.tag in ['span']): + parseInline(thing) + else: + for thing in list(thing): + parseSomething(thing) def parseDiv(div): indentLevel = int(div.tag[3:]) @@ -23,8 +28,17 @@ def parseDiv(div): parseSomething(child) def parseP(p): - text = "".join([x for x in p.itertext()]) - print(text.replace("\n", " ")) + parseInline(p) + # Each paragraph should end with a newline + print("") + +def parseInline(inline): + if(inline.text): + print(inline.text.replace("\n"," "), end='') + for thing in inline: + parseSomething(thing) + if(thing.tail): + print(thing.tail.replace("\n"," "), end='') if __name__ == '__main__': main(sys.argv) |
