changeset 10:013f0cd9e5b3

More work.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 11:27:47 +0300
parents 2a8c65d22f86
children b2ac68732e45
files lxmldump.py
diffstat 1 files changed, 23 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 11 09:33:21 2021 +0300
+++ b/lxmldump.py	Tue May 11 11:27:47 2021 +0300
@@ -24,8 +24,9 @@
     "verbosity": 3,
 
     "annotate": False,
-    "dump": False,
+    "mode": 0,
     "normalize": False,
+    "xml": False,
 
     "debug": False,
 }
@@ -82,9 +83,15 @@
 
 
 ##
+def pkk_str_clean(mstr):
+    return re.sub(r'[\n\r\t]', '', mstr)
+
+
 def pkk_ptr_to_text(pnode):
-#    return "PTR: <{}>".format("".join(pnode.itertext()).strip())
-    return "PTR: <{}>".format(xmlET.tostring(pnode))
+#    return "PTR: {}".format(xmlET.tostring(pnode))
+    return "PTR: <{}>{}</>".format(
+        pnode.attrib["{http://www.w3.org/TR/xlink}href"],
+        ("".join(pnode.itertext())).strip())
 
 
 def pkk_get_text(lnode):
@@ -94,14 +101,14 @@
             stmp += pkk_ptr_to_text(pnode)
         else:
             if isinstance(pnode.text, str):
-                ptext = pnode.text
+                ptext = pkk_str_clean(pnode.text).strip()
                 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap:
                     stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1]
                 else:
                     stmp += ptext
 
             if isinstance(pnode.tail, str):
-                stmp += " "+ pnode.tail.strip()
+                stmp += pkk_str_clean(pnode.tail)
 
     return stmp.strip()
 
@@ -113,14 +120,14 @@
         pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp))
     else:
         if isinstance(lnode.text, str):
-            stmp = lnode.text.strip()
+            stmp = pkk_str_clean(lnode.text).strip()
             if stmp != "":
                 stmp = " \""+ stmp +"\""
         else:
             stmp = ""
 
         if len(lnode.attrib) > 0:
-            atmp = " "+str(lnode.attrib)
+            atmp = " "+ str(lnode.attrib)
         else:
             atmp = ""
 
@@ -198,7 +205,9 @@
         if arg == "help" or arg == "h":
             pkk_show_help = True
         elif arg == "dump" or arg == "d":
-            pkk_cfg["dump"] = True
+            pkk_cfg["mode"] = 1
+        elif arg == "xml" or arg == "x":
+            pkk_cfg["mode"] = 2
         elif arg == "normalize" or arg == "n":
             pkk_cfg["normalize"] = True
         elif arg == "annotate" or arg == "a":
@@ -263,10 +272,14 @@
             if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list:
                 continue
 
-            if pkk_cfg["dump"]:
+            if pkk_cfg["mode"] == 0:
+                pkk_output_node(0, dnode)
+            elif pkk_cfg["mode"] == 1:
                 pkk_dump_recursive(0, dnode)
+            elif pkk_cfg["mode"] == 2:
+                pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n")
             else:
-                pkk_output_node(0, dnode)
+                pkk_fatal("Invalid operation mode?")
 
             print("\n")