#!/usr/bin/env python
import sys
import xml.etree.ElementTree as ET
def main(argv):
root = ET.parse(argv[1]).getroot()
head = root.find('ThML.head')
body = root.find('ThML.body')
for thing in list(body):
parseSomething(thing)
def parseSomething(thing):
if(len(thing.tag) == 4 and thing.tag[:3] == 'div'):
parseDiv(thing)
elif (thing.tag == 'p'):
parseP(thing)
elif (thing.tag in ['span']):
parseInline(thing)
else:
for thing in list(thing):
parseSomething(thing)
def parseDiv(div):
indentLevel = int(div.tag[3:])
title = div.get('title')
print('#' * indentLevel + ' ' + title)
for child in list(div):
parseSomething(child)
def parseP(p):
parseInline(p)
# Each paragraph should end with a newline
print("")
def parseInline(inline):
if(inline.text):
print(inline.text.replace("\n"," "), end='')
for thing in inline:
parseSomething(thing)
if(thing.tail):
print(thing.tail.replace("\n"," "), end='')
if __name__ == '__main__':
main(sys.argv)