changeset 0:bddf1c283e51

Initial import.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 29 Apr 2021 13:51:39 +0300
parents
children 0e5f705a895b
files lxmldump.py
diffstat 1 files changed, 160 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lxmldump.py	Thu Apr 29 13:51:39 2021 +0300
@@ -0,0 +1,160 @@
+#!/usr/bin/python3 -B
+# coding=utf-8
+###
+### ISO/FDIS 1951 lxmldump
+###
+import sys
+import signal
+import re
+from pathlib import Path
+import xml.etree.ElementTree as xmlET
+
+assert sys.version_info >= (3, 7)
+
+
+###
+### Default settings
+###
+pkk_cfg = {
+    "verbosity": 1,
+    "dump": False,
+}
+
+
+###
+### Misc. helper functions, etc
+###
+def pkk_cleanup():
+    return 0
+
+
+## Wrapper for print()
+def pkk_print(level, smsg):
+    if pkk_cfg["verbosity"] >= level:
+        print(smsg)
+
+
+## Fatal error handler
+def pkk_fatal(smsg):
+    print(u"ERROR: "+ smsg)
+    sys.exit(1)
+
+
+## Handler for SIGINT signals
+def pkk_signal_handler(signal, frame):
+    pkk_cleanup()
+    print(u"\nQuitting due to SIGINT / Ctrl+C!")
+    sys.exit(1)
+
+
+###
+### Main program starts
+###
+signal.signal(signal.SIGINT, pkk_signal_handler)
+
+
+### Check if we have arguments
+pkk_show_help = False
+pkk_filenames = [] 
+argc = 1
+while argc < len(sys.argv):
+    arg = sys.argv[argc]
+
+    needs_param = False
+    if argc + 1 < len(sys.argv):
+        param = sys.argv[argc + 1]
+    else:
+        param = None
+
+    # Check for option type arg
+    if arg[0:1] == "-":
+        oarg = arg
+        arg = arg.lstrip("-")
+
+        if arg == "help" or arg == "h":
+            pkk_show_help = True
+        elif arg == "dump" or arg == "d":
+            pkk_cfg["dump"] = True
+        elif arg == "v" or arg == "verbosity":
+            needs_param = True
+            pkk_cfg["verbosity"] = param
+        else:
+            pkk_fatal(u"Invalid option argument '{0}'.".format(oarg))
+
+        if needs_param and param == None:
+            pkk_fatal(u"Option '{0}' requires an argument.".format(oarg))
+    else:
+        # Non-option argument
+        pkk_filenames.append(arg)
+
+    if needs_param:
+        argc += 2
+    else:
+        argc += 1
+
+
+### Show help if requested
+if pkk_show_help or len(pkk_filenames) == 0:
+    print(u"lxmldump - Dump ISO/FDIS 1951 XML file data")
+    print(u"Usage: {0} <options> <input xml file(s)>".
+        format(str(Path(sys.argv[0]).name)))
+    print(u"")
+    print(u"       --help              Show this help")
+#    print(u"  -v,  --verbosity <0-3>   Set verbosity")
+    print(u"  -d,  --dump              Dump mode")
+    print(u"")
+    sys.exit(0)
+
+
+
+
+###
+### Main
+###
+def pkk_recursive_dump(lnode, indent):
+    pok = ""
+    if lnode.text != None:
+        tmp = str(lnode.text).strip()
+        if tmp != "":
+            pok = " \""+ tmp +"\""
+
+    print("{}{} {}{}".format("    " * indent, lnode.tag, lnode.attrib, pok))
+    for qnode in lnode.findall("./*"):
+        pkk_recursive_dump(qnode, indent + 1)
+
+
+for filename in pkk_filenames:
+    # Parse XML file into element tree
+    try:
+        uxml = xmlET.parse(filename)
+    except Exception as e:
+        pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e)))
+
+    # Dump output
+    try:
+        xroot = uxml.getroot()
+        for dnode in xroot.findall("./DictionaryEntry"):
+            if pkk_cfg["dump"]:
+                pkk_recursive_dump(dnode, 0)
+                print("\n\n")
+            else:
+                wlist = []
+                dlist = []
+                for wnode in dnode.findall("./HeadwordCtn"):
+                    for qnode in wnode.findall("./SearchForm"):
+                        wlist.append(str(qnode.text).strip())
+                    for qnode in wnode.findall("./Definition"):
+                        dlist.append(str(qnode.text).strip())
+
+                for wnode in dnode.findall("./SenseGrp"):
+                    for qnode in wnode.findall("./Definition"):
+                        dlist.append(str(qnode.text).strip())
+
+                print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
+
+    except (BrokenPipeError, IOError) as e:
+        sys.stderr.close()
+        sys.exit(1)
+
+pkk_cleanup()
+sys.exit(0)