#!/usr/bin/env python import sys import xml.etree.ElementTree as ET def main(argv): root = ET.parse(argv[1]).getroot() head = root.find('ThML.head') body = root.find('ThML.body') for thing in list(body): parseSomething(thing) def iprint(string): """Inline-printing doesn't end with newline""" print(string, end='') def parseSomething(thing): if(len(thing.tag) == 4 and thing.tag[:3] == 'div'): parseDiv(thing) elif (thing.tag == 'p'): parseP(thing) elif (thing.tag in ['span']): parseInline(thing) elif (thing.tag == 'i'): # Convention: Wrapping italics in asterisks iprint('*'); parseInline(thing); iprint('*') elif (thing.tag == 'ul'): parseList(thing) else: for thing in list(thing): parseSomething(thing) def parseDiv(div): indentLevel = int(div.tag[3:]) title = div.get('title') print('#' * indentLevel + ' ' + title) for child in list(div): parseSomething(child) def parseP(p): parseInline(p) # Each paragraph should end with a newline print("") def parseInline(inline): if(inline.text): iprint(inline.text.replace("\n"," ")) for thing in inline: parseSomething(thing) if(thing.tail): iprint(thing.tail.replace("\n"," ")) def parseList(ul): for child in ul: iprint("* ") parseInline(child) print("") if __name__ == '__main__': main(sys.argv)