From 028e867041aa919e8ba94c4a1859b33933d13c3c Mon Sep 17 00:00:00 2001 From: Zach DeCook Date: Wed, 2 Dec 2020 21:06:17 -0500 Subject: Parse p as a line of text --- thml2gmi.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/thml2gmi.py b/thml2gmi.py index 24bc18a..9137ab5 100755 --- a/thml2gmi.py +++ b/thml2gmi.py @@ -12,6 +12,8 @@ def main(argv): def parseSomething(thing): if(len(thing.tag) == 4 and thing.tag[:3] == 'div'): parseDiv(thing) + elif (thing.tag == 'p'): + parseP(thing) def parseDiv(div): indentLevel = int(div.tag[3:]) @@ -20,5 +22,9 @@ def parseDiv(div): for child in list(div): parseSomething(child) +def parseP(p): + text = "".join([x for x in p.itertext()]) + print(text.replace("\n", " ")) + if __name__ == '__main__': main(sys.argv) -- cgit 1.4.1