# HG changeset patch # User Matti Hamalainen # Date 1620672180 -10800 # Node ID 34a89d61dbe78e9ff438a4ba1f4174b78fc9b950 # Parent 274b2091137cee08385cee4a90456a32d6f56e03# Parent 7ce08dea935b35cee4b1223364034bdf03985b0b Merge and cleanup. diff -r 274b2091137c -r 34a89d61dbe7 lxmldump.py --- a/lxmldump.py Mon May 10 21:38:11 2021 +0300 +++ b/lxmldump.py Mon May 10 21:43:00 2021 +0300 @@ -54,6 +54,46 @@ sys.exit(1) +## +def pkk_dump_recursive(lnode, indent): + if lnode.tag == "Example": + stmp = "".join(lnode.itertext()).strip() + print("{}{} \"{}\"".format(" " * indent, lnode.tag, stmp)) + else: + stmp = "" + if lnode.text != None: + tmp = str(lnode.text).strip() + if tmp != "": + stmp = " \""+ tmp +"\"" + + if len(lnode.attrib) > 0: + atmp = " "+str(lnode.attrib) + else: + atmp = "" + + pkk_print("{}{}{}{}\n".format(" " * indent, lnode.tag, atmp, stmp)) + for qnode in lnode.findall("./*"): + pkk_dump_recursive(qnode, indent + 1) + + +## +def pkk_output_node(dnode): + wlist = [] + dlist = [] + for wnode in dnode.findall("./HeadwordCtn"): + for qnode in wnode.findall("./SearchForm"): + wlist.append(str(qnode.text).strip()) + + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + for wnode in dnode.findall("./SenseGrp"): + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + pkk_print("{} : {}\n".format(", ".join(wlist), " ; ".join(dlist))) + + ### ### Main program starts ### @@ -107,42 +147,12 @@ print(u"") print(u" --help Show this help") print(u" -d, --dump Dump mode") + print(u" -n, --normalize Output NFC normalized Unicode") print(u"") sys.exit(0) -### -### Main -### -def pkk_dump_simple_node(lnode, indent): - stmp = "" - if lnode.text != None: - tmp = str(lnode.text).strip() - if tmp != "": - stmp = " \""+ tmp +"\"" - - pkk_print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) - for qnode in lnode.findall("./*"): - pkk_dump_simple_node(qnode, indent + 1) - - -def pkk_dump_node(dnode): - wlist = [] - dlist = [] - for wnode in dnode.findall("./HeadwordCtn"): - for qnode in wnode.findall("./SearchForm"): - wlist.append(str(qnode.text).strip()) - - for qnode in wnode.findall("./Definition"): - dlist.append(str(qnode.text).strip()) - - for wnode in dnode.findall("./SenseGrp"): - for qnode in wnode.findall("./Definition"): - dlist.append(str(qnode.text).strip()) - - pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) - - +### Handle each input file for filename in pkk_filenames: # Parse XML file into element tree try: @@ -155,10 +165,10 @@ xroot = uxml.getroot() for dnode in xroot.findall("./DictionaryEntry"): if pkk_cfg["dump"]: - pkk_dump_simple_node(dnode, 0) + pkk_dump_recursive(dnode, 0) print("\n\n") else: - pkk_dump_node(dnode) + pkk_output_node(dnode) except (BrokenPipeError, IOError) as e: sys.stderr.close()