changeset 13:3bd772fd6a50

Cleanups.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 12:49:15 +0300
parents d50e71642be7
children 7498bda8b4a2
files lxmldump.py
diffstat 1 files changed, 15 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 11 11:46:42 2021 +0300
+++ b/lxmldump.py	Tue May 11 12:49:15 2021 +0300
@@ -54,7 +54,7 @@
     return 0
 
 
-## Wrapper for print()
+## Print string to stdout using normalized Unicode if enabled
 def pkk_print(smsg):
     try:
         if pkk_cfg["normalize"]:
@@ -66,10 +66,12 @@
         sys.stderr.close()
 
 
+## Print string with indentation
 def pkk_printi(indent, smsg):
     pkk_print(("    " * indent) + smsg)
 
 
+## Check value against current verbosity level
 def pkk_verbosity(lvl):
     return pkk_cfg["verbosity"] >= lvl
 
@@ -87,17 +89,19 @@
     sys.exit(1)
 
 
-##
+## Clean string by removing tabs and newlines
 def pkk_str_clean(mstr):
     return re.sub(r'[\n\r\t]', '', mstr)
 
 
+## Format "Ptr" node as text
 def pkk_ptr_to_text(pnode):
     return "PTR: <{}>{}</>".format(
         pnode.attrib["{http://www.w3.org/TR/xlink}href"],
         ("".join(pnode.itertext())).strip())
 
 
+## Get text inside a given node
 def pkk_get_text(lnode):
     stmp = ""
     for pnode in lnode.iter():
@@ -117,7 +121,7 @@
     return stmp.strip()
 
 
-##
+## Simple recursive dump starting at given node
 def pkk_dump_recursive(indent, lnode):
     if lnode.tag in ["Example"]:
         stmp = pkk_get_text(lnode)
@@ -140,20 +144,19 @@
             pkk_dump_recursive(indent + 1, qnode)
 
 
-##
-def pkk_output_one(indent, dnode, dsub, dfmt):
+## Output item under given node
+def pkk_output_subs_fmt(indent, dnode, dsub, dname, dfmt):
     for qnode in dnode.findall(dsub):
-        pkk_printi(indent, dfmt.format(pkk_get_text(qnode)))
+        pkk_printi(indent, dfmt.format(dname, pkk_get_text(qnode)))
 
 
-def pkk_output_subs(indent, dnode, dsub, dname):
-    for qnode in dnode.findall(dsub):
-        pkk_printi(indent, "{} \"{}\"\n".format(dname, pkk_get_text(qnode)))
+def pkk_output_subs_prefix(indent, dnode, dsub, dname):
+    pkk_output_subs_fmt(indent, dnode, dsub, dname, "{0} \"{1}\"\n")
 
 
 def pkk_output_sense(indent, dnode):
-    pkk_output_subs(indent, dnode, "./SearchForm", "srch")
-    pkk_output_subs(indent, dnode, "./Definition", "defn")
+    pkk_output_subs_prefix(indent, dnode, "./SearchForm", "srch")
+    pkk_output_subs_prefix(indent, dnode, "./Definition", "defn")
 
     for wnode in dnode.findall("./ExampleBlock/ExampleCtn"):
         sstr = pkk_get_text(wnode.find("./Example"))
@@ -173,7 +176,7 @@
 def pkk_output_node(indent, dnode):
 
     for wnode in dnode.findall("./HeadwordCtn"):
-        pkk_output_one (indent, wnode, "./Headword", "\"{}\":\n")
+        pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n")
         pkk_output_sense(indent + 1, wnode)
 
         index = 1