changeset 25:8a6738f67106

Factor string annotation into separate function.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 25 May 2021 10:58:19 +0300
parents 8c8e8e4504bb
children 420f13925f20
files lxmldump.py
diffstat 1 files changed, 25 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 25 10:50:02 2021 +0300
+++ b/lxmldump.py	Tue May 25 10:58:19 2021 +0300
@@ -24,8 +24,17 @@
 ###
 ### Default settings
 ###
-pkk_str_fmap = {
-    "Fragment" : ["<", ">"],
+# Operation modes
+PKK_MODE_NORMAL     = 0
+PKK_MODE_DUMP       = 1
+PKK_MODE_XML        = 2
+
+
+# Element annotation mappings
+pkk_element_annotation_map = {
+    "Fragment" : {
+        PKK_MODE_NORMAL: ["<", ">"],
+    },
 }
 
 
@@ -41,11 +50,6 @@
 ]
 
 
-# Operation modes
-PKK_MODE_NORMAL     = 0
-PKK_MODE_DUMP       = 1
-PKK_MODE_XML        = 2
-
 
 ###
 ### Misc. helper functions, etc
@@ -84,6 +88,19 @@
     sys.exit(1)
 
 
+## Annotate string
+def pkk_str_annotate(mtag, mtext):
+    if pkk_cfg.annotate and mtag in pkk_element_annotation_map:
+        if pkk_cfg.mode in pkk_element_annotation_map[mtag]:
+            mmode = pkk_cfg.mode
+        else:
+            mmode = PKK_MODE_NORMAL
+
+        return pkk_element_annotation_map[mtag][mmode][0] + mtext + pkk_element_annotation_map[mtag][mmode][1]
+    else:
+        return mtext
+
+
 ## Clean string by removing tabs and newlines
 def pkk_str_clean(mstr):
     return re.sub(r'[\n\r\t]', '', mstr)
@@ -104,11 +121,7 @@
             stmp += pkk_ptr_to_text(pnode)
         else:
             if isinstance(pnode.text, str):
-                ptext = pkk_str_clean(pnode.text).strip()
-                if pkk_cfg.annotate and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap:
-                    stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1]
-                else:
-                    stmp += ptext
+                stmp += pkk_str_annotate(pnode.tag, pkk_str_clean(pnode.text).strip())
 
             if isinstance(pnode.tail, str):
                 stmp += pkk_str_clean(pnode.tail)