# HG changeset patch # User Matti Hamalainen # Date 1620671891 -10800 # Node ID 274b2091137cee08385cee4a90456a32d6f56e03 # Parent 60b789dfee32d825f68658ebe65c76610aae33c3 Some more work on cleaning this up. diff -r 60b789dfee32 -r 274b2091137c lxmldump.py --- a/lxmldump.py Tue May 04 13:58:45 2021 +0300 +++ b/lxmldump.py Mon May 10 21:38:11 2021 +0300 @@ -12,6 +12,7 @@ import re from pathlib import Path import xml.etree.ElementTree as xmlET +import unicodedata assert sys.version_info >= (3, 7) @@ -20,8 +21,8 @@ ### Default settings ### pkk_cfg = { - "verbosity": 1, "dump": False, + "normalize": False, } @@ -33,9 +34,11 @@ ## Wrapper for print() -def pkk_print(level, smsg): - if pkk_cfg["verbosity"] >= level: - print(smsg) +def pkk_print(smsg): + if pkk_cfg["normalize"]: + sys.stdout.write(unicodedata.normalize("NFC", smsg)) + else: + sys.stdout.write(smsg) ## Fatal error handler @@ -79,9 +82,8 @@ pkk_show_help = True elif arg == "dump" or arg == "d": pkk_cfg["dump"] = True - elif arg == "v" or arg == "verbosity": - needs_param = True - pkk_cfg["verbosity"] = param + elif arg == "normalize" or arg == "n": + pkk_cfg["normalize"] = True else: pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) @@ -104,27 +106,41 @@ format(str(Path(sys.argv[0]).name))) print(u"") print(u" --help Show this help") -# print(u" -v, --verbosity <0-3> Set verbosity") print(u" -d, --dump Dump mode") print(u"") sys.exit(0) - - ### ### Main ### -def pkk_recursive_dump(lnode, indent): +def pkk_dump_simple_node(lnode, indent): stmp = "" if lnode.text != None: tmp = str(lnode.text).strip() if tmp != "": stmp = " \""+ tmp +"\"" - print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) + pkk_print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) for qnode in lnode.findall("./*"): - pkk_recursive_dump(qnode, indent + 1) + pkk_dump_simple_node(qnode, indent + 1) + + +def pkk_dump_node(dnode): + wlist = [] + dlist = [] + for wnode in dnode.findall("./HeadwordCtn"): + for qnode in wnode.findall("./SearchForm"): + wlist.append(str(qnode.text).strip()) + + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + for wnode in dnode.findall("./SenseGrp"): + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) for filename in pkk_filenames: @@ -139,22 +155,10 @@ xroot = uxml.getroot() for dnode in xroot.findall("./DictionaryEntry"): if pkk_cfg["dump"]: - pkk_recursive_dump(dnode, 0) + pkk_dump_simple_node(dnode, 0) print("\n\n") else: - wlist = [] - dlist = [] - for wnode in dnode.findall("./HeadwordCtn"): - for qnode in wnode.findall("./SearchForm"): - wlist.append(str(qnode.text).strip()) - for qnode in wnode.findall("./Definition"): - dlist.append(str(qnode.text).strip()) - - for wnode in dnode.findall("./SenseGrp"): - for qnode in wnode.findall("./Definition"): - dlist.append(str(qnode.text).strip()) - - print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) + pkk_dump_node(dnode) except (BrokenPipeError, IOError) as e: sys.stderr.close()