# HG changeset patch # User Matti Hamalainen # Date 1619693499 -10800 # Node ID bddf1c283e513887d5297810d36172e5f6ffabac Initial import. diff -r 000000000000 -r bddf1c283e51 lxmldump.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lxmldump.py Thu Apr 29 13:51:39 2021 +0300 @@ -0,0 +1,160 @@ +#!/usr/bin/python3 -B +# coding=utf-8 +### +### ISO/FDIS 1951 lxmldump +### +import sys +import signal +import re +from pathlib import Path +import xml.etree.ElementTree as xmlET + +assert sys.version_info >= (3, 7) + + +### +### Default settings +### +pkk_cfg = { + "verbosity": 1, + "dump": False, +} + + +### +### Misc. helper functions, etc +### +def pkk_cleanup(): + return 0 + + +## Wrapper for print() +def pkk_print(level, smsg): + if pkk_cfg["verbosity"] >= level: + print(smsg) + + +## Fatal error handler +def pkk_fatal(smsg): + print(u"ERROR: "+ smsg) + sys.exit(1) + + +## Handler for SIGINT signals +def pkk_signal_handler(signal, frame): + pkk_cleanup() + print(u"\nQuitting due to SIGINT / Ctrl+C!") + sys.exit(1) + + +### +### Main program starts +### +signal.signal(signal.SIGINT, pkk_signal_handler) + + +### Check if we have arguments +pkk_show_help = False +pkk_filenames = [] +argc = 1 +while argc < len(sys.argv): + arg = sys.argv[argc] + + needs_param = False + if argc + 1 < len(sys.argv): + param = sys.argv[argc + 1] + else: + param = None + + # Check for option type arg + if arg[0:1] == "-": + oarg = arg + arg = arg.lstrip("-") + + if arg == "help" or arg == "h": + pkk_show_help = True + elif arg == "dump" or arg == "d": + pkk_cfg["dump"] = True + elif arg == "v" or arg == "verbosity": + needs_param = True + pkk_cfg["verbosity"] = param + else: + pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) + + if needs_param and param == None: + pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) + else: + # Non-option argument + pkk_filenames.append(arg) + + if needs_param: + argc += 2 + else: + argc += 1 + + +### Show help if requested +if pkk_show_help or len(pkk_filenames) == 0: + print(u"lxmldump - Dump ISO/FDIS 1951 XML file data") + print(u"Usage: {0} ". + format(str(Path(sys.argv[0]).name))) + print(u"") + print(u" --help Show this help") +# print(u" -v, --verbosity <0-3> Set verbosity") + print(u" -d, --dump Dump mode") + print(u"") + sys.exit(0) + + + + +### +### Main +### +def pkk_recursive_dump(lnode, indent): + pok = "" + if lnode.text != None: + tmp = str(lnode.text).strip() + if tmp != "": + pok = " \""+ tmp +"\"" + + print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, pok)) + for qnode in lnode.findall("./*"): + pkk_recursive_dump(qnode, indent + 1) + + +for filename in pkk_filenames: + # Parse XML file into element tree + try: + uxml = xmlET.parse(filename) + except Exception as e: + pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e))) + + # Dump output + try: + xroot = uxml.getroot() + for dnode in xroot.findall("./DictionaryEntry"): + if pkk_cfg["dump"]: + pkk_recursive_dump(dnode, 0) + print("\n\n") + else: + wlist = [] + dlist = [] + for wnode in dnode.findall("./HeadwordCtn"): + for qnode in wnode.findall("./SearchForm"): + wlist.append(str(qnode.text).strip()) + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + for wnode in dnode.findall("./SenseGrp"): + for qnode in wnode.findall("./Definition"): + dlist.append(str(qnode.text).strip()) + + print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) + + except (BrokenPipeError, IOError) as e: + sys.stderr.close() + sys.exit(1) + +pkk_cleanup() +sys.exit(0)