changeset 30:34755af2ea1f

Make Ptr field URL formatting configurable.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 25 May 2021 12:40:03 +0300
parents f91ef7d7615b
children 4cbefe4c6f53
files lxmldump.py
diffstat 1 files changed, 32 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 25 12:39:04 2021 +0300
+++ b/lxmldump.py	Tue May 25 12:40:03 2021 +0300
@@ -30,6 +30,19 @@
 PKK_MODE_XML        = 2
 
 
+pkk_modes_list = {
+    PKK_MODE_NORMAL: "normal",
+    PKK_MODE_DUMP: "dump",
+    PKK_MODE_XML: "xml",
+}
+
+
+# Default Ptr URL format strings
+pkk_ptr_url_fmt = {
+    PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
+}
+
+
 # Element annotation mappings
 pkk_element_annotation_map = {
     "Fragment" : {
@@ -109,10 +122,19 @@
 
 ## Format a "Ptr" node as text
 def pkk_ptr_to_text(pnode):
-    phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"]
-    ptext = ("".join(pnode.itertext())).strip()
+    # If custom format set, use it
+    if pkk_cfg.ptr_url_fmt != None:
+        pfmt = pkk_cfg.ptr_url_fmt
+    elif pkk_cfg.mode in pkk_ptr_url_fmt:
+        # Else try mode-specific
+        pfmt = pkk_ptr_url_fmt[pkk_cfg.mode]
+    else:
+        # Last resort is normal mode format
+        pfmt = pkk_ptr_url_fmt[PKK_MODE_NORMAL]
 
-    return f"<PTR:{phref}>{ptext}</PTR>"
+    return pfmt.format(
+        text=("".join(pnode.itertext())).strip(),
+        href=pnode.attrib["{http://www.w3.org/TR/xlink}href"])
 
 
 ## Get text inside a given node
@@ -246,6 +268,13 @@
     action="store_const", const=PKK_MODE_XML,
     help="output as XML")
 
+optparser.add_argument("--ptr-url-fmt",
+    dest="ptr_url_fmt",
+    type=str,
+    default=None,
+    metavar="str",
+    help='Ptr URL format string (see below)')
+
 optparser.add_argument("-n", "--normalize",
     dest="normalize",
     action="store_true",