changeset 5:274b2091137c

Some more work on cleaning this up.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 10 May 2021 21:38:11 +0300
parents 60b789dfee32
children 34a89d61dbe7
files lxmldump.py
diffstat 1 files changed, 31 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 04 13:58:45 2021 +0300
+++ b/lxmldump.py	Mon May 10 21:38:11 2021 +0300
@@ -12,6 +12,7 @@
 import re
 from pathlib import Path
 import xml.etree.ElementTree as xmlET
+import unicodedata
 
 assert sys.version_info >= (3, 7)
 
@@ -20,8 +21,8 @@
 ### Default settings
 ###
 pkk_cfg = {
-    "verbosity": 1,
     "dump": False,
+    "normalize": False,
 }
 
 
@@ -33,9 +34,11 @@
 
 
 ## Wrapper for print()
-def pkk_print(level, smsg):
-    if pkk_cfg["verbosity"] >= level:
-        print(smsg)
+def pkk_print(smsg):
+    if pkk_cfg["normalize"]:
+        sys.stdout.write(unicodedata.normalize("NFC", smsg))
+    else:
+        sys.stdout.write(smsg)
 
 
 ## Fatal error handler
@@ -79,9 +82,8 @@
             pkk_show_help = True
         elif arg == "dump" or arg == "d":
             pkk_cfg["dump"] = True
-        elif arg == "v" or arg == "verbosity":
-            needs_param = True
-            pkk_cfg["verbosity"] = param
+        elif arg == "normalize" or arg == "n":
+            pkk_cfg["normalize"] = True
         else:
             pkk_fatal(u"Invalid option argument '{0}'.".format(oarg))
 
@@ -104,27 +106,41 @@
         format(str(Path(sys.argv[0]).name)))
     print(u"")
     print(u"       --help              Show this help")
-#    print(u"  -v,  --verbosity <0-3>   Set verbosity")
     print(u"  -d,  --dump              Dump mode")
     print(u"")
     sys.exit(0)
 
 
-
-
 ###
 ### Main
 ###
-def pkk_recursive_dump(lnode, indent):
+def pkk_dump_simple_node(lnode, indent):
     stmp = ""
     if lnode.text != None:
         tmp = str(lnode.text).strip()
         if tmp != "":
             stmp = " \""+ tmp +"\""
 
-    print("{}{} {}{}".format("    " * indent, lnode.tag, lnode.attrib, stmp))
+    pkk_print("{}{} {}{}".format("    " * indent, lnode.tag, lnode.attrib, stmp))
     for qnode in lnode.findall("./*"):
-        pkk_recursive_dump(qnode, indent + 1)
+        pkk_dump_simple_node(qnode, indent + 1)
+
+
+def pkk_dump_node(dnode):
+    wlist = []
+    dlist = []
+    for wnode in dnode.findall("./HeadwordCtn"):
+        for qnode in wnode.findall("./SearchForm"):
+            wlist.append(str(qnode.text).strip())
+
+        for qnode in wnode.findall("./Definition"):
+            dlist.append(str(qnode.text).strip())
+
+    for wnode in dnode.findall("./SenseGrp"):
+        for qnode in wnode.findall("./Definition"):
+            dlist.append(str(qnode.text).strip())
+
+    pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
 
 
 for filename in pkk_filenames:
@@ -139,22 +155,10 @@
         xroot = uxml.getroot()
         for dnode in xroot.findall("./DictionaryEntry"):
             if pkk_cfg["dump"]:
-                pkk_recursive_dump(dnode, 0)
+                pkk_dump_simple_node(dnode, 0)
                 print("\n\n")
             else:
-                wlist = []
-                dlist = []
-                for wnode in dnode.findall("./HeadwordCtn"):
-                    for qnode in wnode.findall("./SearchForm"):
-                        wlist.append(str(qnode.text).strip())
-                    for qnode in wnode.findall("./Definition"):
-                        dlist.append(str(qnode.text).strip())
-
-                for wnode in dnode.findall("./SenseGrp"):
-                    for qnode in wnode.findall("./Definition"):
-                        dlist.append(str(qnode.text).strip())
-
-                print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
+                pkk_dump_node(dnode)
 
     except (BrokenPipeError, IOError) as e:
         sys.stderr.close()