changeset 60:cbed8ee15701

Cleanups.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 31 May 2021 13:59:16 +0300
parents d52696972bf2
children 9c36574199f5
files lxmldump.py
diffstat 1 files changed, 9 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Mon May 31 13:28:48 2021 +0300
+++ b/lxmldump.py	Mon May 31 13:59:16 2021 +0300
@@ -268,14 +268,17 @@
 
 ## Clean string by removing tabs and newlines
 def pkk_str_clean(mstr):
-    return re.sub(r'[\n\r\t]', '', mstr)
+    return re.sub(r'[\n\r\t]', '', mstr).strip()
 
 
 ## Format a "Ptr" node as text
 def pkk_ptr_to_text(pnode):
     pfmt = pkk_get_fmt("ptr_fmt")
+    ptext = ("".join(pnode.itertext())).strip()
+    if pkk_cfg.annotate:
+        ptext = re.sub(r'\s*\.\s*$', '', ptext)
     return pfmt.format(
-        text=re.sub(r'\.$', '', ("".join(pnode.itertext())).strip()),
+        text=ptext,
         href=pnode.attrib["{http://www.w3.org/TR/xlink}href"])
 
 
@@ -287,12 +290,12 @@
             stmp += pkk_ptr_to_text(pnode)
         else:
             if isinstance(pnode.text, str):
-                stmp += pkk_str_annotate(pnode.tag, pkk_str_clean(pnode.text))
+                stmp += pkk_str_annotate(pnode.tag, pnode.text)
 
             if isinstance(pnode.tail, str):
-                stmp += pkk_str_clean(pnode.tail)
+                stmp += pnode.tail
 
-    return re.sub(r'\.$', '', stmp.strip())
+    return pkk_str_clean(re.sub(r'\s*\.\s*$', '', stmp))
 
 
 ## Simple recursive dump starting at given node
@@ -302,7 +305,7 @@
         pkk_printi(indent, f"{lnode.tag} \"{stmp}\"\n")
     else:
         if isinstance(lnode.text, str):
-            textstr = pkk_str_clean(lnode.text).strip()
+            textstr = pkk_str_clean(lnode.text)
             if textstr != "":
                 textstr = " \""+ textstr +"\""
         else: