Mercurial > hg > lxmldump
changeset 6:34a89d61dbe7
Merge and cleanup.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 10 May 2021 21:43:00 +0300 |
parents | 274b2091137c (diff) 7ce08dea935b (current diff) |
children | 4b4299b62f7f |
files | lxmldump.py |
diffstat | 1 files changed, 58 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/lxmldump.py Mon May 10 12:24:10 2021 +0300 +++ b/lxmldump.py Mon May 10 21:43:00 2021 +0300 @@ -1,13 +1,18 @@ #!/usr/bin/python3 -B # coding=utf-8 ### -### ISO/FDIS 1951 lxmldump +### lxmldump - Dump ISO/FDIS 1951 XML file data +### Programmed and designed by Matti 'ccr' Hämäläinen <ccr@tnsp.org> +### (C) Copyright 2021 Tecnic Software productions (TNSP) +### +### Python 3.7+ required! ### import sys import signal import re from pathlib import Path import xml.etree.ElementTree as xmlET +import unicodedata assert sys.version_info >= (3, 7) @@ -16,8 +21,8 @@ ### Default settings ### pkk_cfg = { - "verbosity": 1, "dump": False, + "normalize": False, } @@ -29,9 +34,11 @@ ## Wrapper for print() -def pkk_print(level, smsg): - if pkk_cfg["verbosity"] >= level: - print(smsg) +def pkk_print(smsg): + if pkk_cfg["normalize"]: + sys.stdout.write(unicodedata.normalize("NFC", smsg)) + else: + sys.stdout.write(smsg) ## Fatal error handler @@ -47,6 +54,46 @@ sys.exit(1) +## +def pkk_dump_recursive(lnode, indent): + if lnode.tag == "Example": + stmp = "".join(lnode.itertext()).strip() + print("{}{} \"{}\"".format(" " * indent, lnode.tag, stmp)) + else: + stmp = "" + if lnode.text != None: + tmp = str(lnode.text).strip() + if tmp != "": + stmp = " \""+ tmp +"\"" + + if len(lnode.attrib) > 0: + atmp = " "+str(lnode.attrib) + else: + atmp = "" + + pkk_print("{}{}{}{}\n".format(" " * indent, lnode.tag, atmp, stmp)) + for qnode in lnode.findall("./*"): + pkk_dump_recursive(qnode, indent + 1) + + +## +def pkk_output_node(dnode): + wlist = [] + dlist = [] + for wnode in dnode.findall("./HeadwordCtn"): + for qnode in wnode.findall("./SearchForm"): + wlist.append(str(qnode.text).strip()) + + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + for wnode in dnode.findall("./SenseGrp"): + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + pkk_print("{} : {}\n".format(", ".join(wlist), " ; ".join(dlist))) + + ### ### Main program starts ### @@ -75,9 +122,8 @@ pkk_show_help = True elif arg == "dump" or arg == "d": pkk_cfg["dump"] = True - elif arg == "v" or arg == "verbosity": - needs_param = True - pkk_cfg["verbosity"] = param + elif arg == "normalize" or arg == "n": + pkk_cfg["normalize"] = True else: pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) @@ -100,38 +146,13 @@ format(str(Path(sys.argv[0]).name))) print(u"") print(u" --help Show this help") -# print(u" -v, --verbosity <0-3> Set verbosity") print(u" -d, --dump Dump mode") + print(u" -n, --normalize Output NFC normalized Unicode") print(u"") sys.exit(0) - - -### -### Main -### -def pkk_recursive_dump(lnode, indent): - if lnode.tag == "Example": - stmp = "".join(lnode.itertext()).strip() - print("{}{} \"{}\"".format(" " * indent, lnode.tag, stmp)) - else: - stmp = "" - if lnode.text != None: - tmp = str(lnode.text).strip() - if tmp != "": - stmp = " \""+ tmp +"\"" - - if len(lnode.attrib) > 0: - atmp = " "+str(lnode.attrib) - else: - atmp = "" - - print("{}{}{}{}".format(" " * indent, lnode.tag, atmp, stmp)) - for qnode in lnode.findall("./*"): - pkk_recursive_dump(qnode, indent + 1) - - +### Handle each input file for filename in pkk_filenames: # Parse XML file into element tree try: @@ -144,22 +165,10 @@ xroot = uxml.getroot() for dnode in xroot.findall("./DictionaryEntry"): if pkk_cfg["dump"]: - pkk_recursive_dump(dnode, 0) + pkk_dump_recursive(dnode, 0) print("\n\n") else: - wlist = [] - dlist = [] - for wnode in dnode.findall("./HeadwordCtn"): - for qnode in wnode.findall("./SearchForm"): - wlist.append(str(qnode.text).strip()) - for qnode in wnode.findall("./Definition"): - dlist.append(str(qnode.text).strip()) - - for wnode in dnode.findall("./SenseGrp"): - for qnode in wnode.findall("./Definition"): - dlist.append(str(qnode.text).strip()) - - print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) + pkk_output_node(dnode) except (BrokenPipeError, IOError) as e: sys.stderr.close()