changeset 36:4c8aafff8c5f

Refactor output handling to be (mostly) configurable.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 25 May 2021 23:19:04 +0300
parents 5aafa87dbec2
children e176fcfc0235
files lxmldump.py
diffstat 1 files changed, 141 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 25 13:28:24 2021 +0300
+++ b/lxmldump.py	Tue May 25 23:19:04 2021 +0300
@@ -1,7 +1,7 @@
 #!/usr/bin/python3 -B
 # coding=utf-8
 ###
-### lxmldump - Dump ISO/FDIS 1951 XML file data
+### lxmldump - Convert and dump ISO/FDIS 1951 XML file data
 ### Programmed and designed by Matti 'ccr' Hämäläinen <ccr@tnsp.org>
 ### (C) Copyright 2021 Tecnic Software productions (TNSP)
 ###
@@ -39,10 +39,56 @@
 }
 
 
-# Default Ptr URL format strings
-pkk_ptr_url_fmt = {
-    PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
-    PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>",
+pkk_mode_defaults = {
+    # Default Ptr URL format strings
+    "ptr_url_fmt": {
+        PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
+        PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>",
+    },
+
+    "word_fmt": {
+        PKK_MODE_NORMAL: "\"{text}\"",
+    },
+    "word_attr_sep": {
+        PKK_MODE_NORMAL: " ; ",
+        PKK_MODE_ANKI: ":",
+    },
+    "word_attr_fmt": {
+        PKK_MODE_NORMAL: " ({alist})",
+    },
+
+    "word_eol": {
+        PKK_MODE_NORMAL: "\n",
+    },
+
+    "sense_index": {
+        PKK_MODE_NORMAL: "sense #{index}\n",
+    },
+
+    "search_fmt": {
+        PKK_MODE_NORMAL: "srch \"{text}\"\n",
+    },
+
+    "definition_fmt": {
+        PKK_MODE_NORMAL: "defn \"{text}\"\n",
+    },
+
+    "example_fmt": {
+        PKK_MODE_NORMAL: "exmp \"{text}\"{geostr}\n",
+    },
+    "example_geo_list": {
+        PKK_MODE_NORMAL: " ({glist})",
+    },
+    "example_geo_empty": {
+        PKK_MODE_NORMAL: "",
+    },
+    "example_geo_sep": {
+        PKK_MODE_NORMAL: ", ",
+    },
+
+    "word_end": {
+        PKK_MODE_NORMAL: "\n",
+    },
 }
 
 
@@ -69,6 +115,8 @@
 ]
 
 
+pkk_settings = {}
+
 
 ###
 ### Misc. helper functions, etc
@@ -107,6 +155,56 @@
     sys.exit(1)
 
 
+## Value handling
+class pkk_set_value(argparse.Action):
+
+    rexpr = re.compile(r'\s*(\w+)\s*=\s*(.*)\s*')
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        rmatch = re.match(self.rexpr, values)
+        if rmatch:
+            rid = rmatch.group(1).lower().replace("-", "_")
+            rval = rmatch.group(2)
+            if rid in pkk_mode_defaults:
+                pkk_settings[rid] = rval
+            else:
+                pkk_fatal(f"Invalid option '{option_string} {values}': No such ID '{rid}'.")
+        else:
+            pkk_fatal(f"Invalid option '{option_string} {values}': Expected id=value.")
+
+
+## Get mode if it exists
+def pkk_test_value(mid):
+    if mid in pkk_mode_defaults:
+        if pkk_cfg.mode in pkk_mode_defaults[mid]:
+            mmode = pkk_cfg.mode
+        else:
+            mmode = PKK_MODE_NORMAL
+
+        if mmode in pkk_mode_defaults[mid]:
+            return mmode
+        else:
+            return None
+    else:
+        return None
+
+
+## Get default value per mode
+def pkk_get_value(mid):
+    if mid in pkk_settings and pkk_settings[mid] != None:
+        return pkk_settings[mid]
+
+    mmode = pkk_test_value(mid)
+    if mmode == None:
+        pkk_fatal(f"Internal error: No mode for ID '{mid}'.")
+
+    return pkk_mode_defaults[mid][mmode]
+
+
+def pkk_get_fmt(mid):
+    return pkk_get_value(mid).replace("\\n", "\n")
+
+
 ## Annotate given string with prefix and suffix based on tag
 def pkk_str_annotate(mtag, mstr):
     if pkk_cfg.annotate and mtag in pkk_element_annotation_map:
@@ -127,16 +225,7 @@
 
 ## Format a "Ptr" node as text
 def pkk_ptr_to_text(pnode):
-    # If custom format set, use it
-    if pkk_cfg.ptr_url_fmt != None:
-        pfmt = pkk_cfg.ptr_url_fmt
-    elif pkk_cfg.mode in pkk_ptr_url_fmt:
-        # Else try mode-specific
-        pfmt = pkk_ptr_url_fmt[pkk_cfg.mode]
-    else:
-        # Last resort is normal mode format
-        pfmt = pkk_ptr_url_fmt[PKK_MODE_NORMAL]
-
+    pfmt = pkk_get_fmt("ptr_url_fmt")
     return pfmt.format(
         text=("".join(pnode.itertext())).strip(),
         href=pnode.attrib["{http://www.w3.org/TR/xlink}href"])
@@ -182,34 +271,31 @@
 
 
 ## Output item(s) under given node with given format string
-def pkk_output_subs_fmt(indent, dnode, dsub, dname, dfmt):
+def pkk_output_subs(indent, dnode, dsub, dfmtname):
+    dfmt = pkk_get_fmt(dfmtname)
     for qnode in dnode.findall(dsub):
-        pkk_printi(indent, dfmt.format(nname=dname, ntext=pkk_node_to_text(qnode)))
-
-## Output item(s) under given node with a prefixed name string
-def pkk_output_subs_prefix(indent, dnode, dsub, dname):
-    pkk_output_subs_fmt(indent, dnode, dsub, dname, "{nname} \"{ntext}\"\n")
+        pkk_printi(indent, dfmt.format(text=pkk_node_to_text(qnode)))
 
 
 ## Output a main "Headword" or "Sense" node under it
 def pkk_output_sense(indent, dnode):
     # Search form and definition
-    pkk_output_subs_prefix(indent, dnode, "./SearchForm", "srch")
-    pkk_output_subs_prefix(indent, dnode, "./Definition", "defn")
+    pkk_output_subs(indent, dnode, "./SearchForm", "search_fmt")
+    pkk_output_subs(indent, dnode, "./Definition", "definition_fmt")
 
     # Examples
     for wnode in dnode.findall("./ExampleBlock/ExampleCtn"):
-        sstr = pkk_node_to_text(wnode.find("./Example"))
-        lstr = ""
-
-        ltmp = []
+        geolist = []
         for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"):
-            ltmp.append("{} [{}]".format(pkk_node_to_text(qnode), qnode.attrib["class"]))
+            geolist.append("{} [{}]".format(pkk_node_to_text(qnode), qnode.attrib["class"]))
 
-        if len(ltmp) > 0:
-            lstr = " ({})".format(", ".join(ltmp))
+        if len(geolist) > 0:
+            geostr = pkk_get_fmt("example_geo_list").format(glist=pkk_get_fmt("example_geo_sep").join(geolist))
+        else:
+            geostr = pkk_get_fmt("example_geo_empty")
 
-        pkk_printi(indent + 1, "{} \"{}\"{}\n".format("exmp", sstr, lstr))
+        pkk_printi(indent + 1, pkk_get_fmt("example_fmt").format(
+            text=pkk_node_to_text(wnode.find("./Example")), geostr=geostr))
 
 
 ## Output one "DictionaryEntry" node
@@ -229,12 +315,13 @@
         tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr)))
 
         # Print the headword and attributes if any
-        pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{ntext}\"")
+        pkk_output_subs(indent, wnode, "./Headword", "word_fmt")
 
         if len(tmpl) > 0:
-            pkk_print(" ({nlist})".format(nlist=pkk_cfg.word_attr_sep.join(tmpl)))
+            pkk_print(pkk_get_fmt("word_attr_fmt").format(
+                alist=pkk_get_fmt("word_attr_sep").join(tmpl)))
 
-        pkk_print("\n")
+        pkk_print(pkk_get_fmt("word_eol"))
 
         # Print main "sense"
         pkk_output_sense(indent + 1, wnode)
@@ -242,10 +329,12 @@
         # Print any other "senses"
         index = 1
         for wnode in dnode.findall("./SenseGrp"):
-            pkk_printi(indent + 1, f"sense #{index}\n")
+            pkk_printi(indent + 1, pkk_get_fmt("sense_index").format(index=index))
             pkk_output_sense(indent + 2, wnode)
             index += 1
 
+        pkk_print(pkk_get_fmt("word_end"))
+
 
 ###
 ### Main program starts
@@ -253,7 +342,7 @@
 signal.signal(signal.SIGINT, pkk_signal_handler)
 
 optparser = argparse.ArgumentParser(
-    description="lxmldump - Dump ISO/FDIS 1951 XML file data",
+    description="lxmldump - Convert and dump ISO/FDIS 1951 XML file data",
     usage="%(prog)s [options] <input xml file(s)>",
     add_help=False
     )
@@ -284,18 +373,10 @@
     help=argparse.SUPPRESS)
 #    help="output Anki compatible")
 
-optparser.add_argument("--ptr-url-fmt",
-    dest="ptr_url_fmt",
-    type=str,
-    default=None,
-    metavar="str",
-    help='Ptr URL format string (see below)')
-
-optparser.add_argument("--attr-sep",
-    dest="word_attr_sep",
-    type=str, default=" ; ",
-    metavar="str",
-    help='word attribute separator (default: \"%(default)s\")')
+optparser.add_argument("-s", "--set",
+    action=pkk_set_value,
+    metavar="id=val",
+    help='set value (see below)')
 
 optparser.add_argument("-n", "--normalize",
     dest="normalize",
@@ -311,7 +392,7 @@
     dest="indent",
     type=int, choices=range(0, 32), default=4,
     metavar="n",
-    help='indent output by <n> characters (default: %(default)s)')
+    help='set indentation level (default: %(default)s)')
 
 optparser.add_argument("-p", "--debug",
     dest="debug",
@@ -319,15 +400,18 @@
     help=argparse.SUPPRESS)
 
 
+### Parse arguments
+pkk_cfg = optparser.parse_args()
+
+
 ### Show help if needed
-pkk_cfg = optparser.parse_args()
 if len(pkk_cfg.filenames) == 0 or pkk_cfg.show_help:
     optparser.print_help()
 
-    print(u"\nDefault Ptr format strings per mode:")
-    for pmode in pkk_modes_list:
-        if pmode in pkk_ptr_url_fmt:
-            print(u" {:6s} : \"{}\"".format(pkk_modes_list[pmode], pkk_ptr_url_fmt[pmode]))
+#    print(u"\nDefault Ptr format strings per mode:")
+#    for pmode in pkk_modes_list:
+#        if pmode in pkk_ptr_url_fmt:
+#            print(u" {:6s} : \"{}\"".format(pkk_modes_list[pmode], pkk_ptr_url_fmt[pmode]))
 
     print(u"")
     sys.exit(0)
@@ -353,18 +437,18 @@
                 try:
                     pkk_output_node(0, dnode)
                 except Exception as e:
+                    print("")
                     pkk_dump_recursive(0, dnode)
                     print(str(e))
                     sys.exit(0)
             elif pkk_cfg.mode == PKK_MODE_DUMP:
                 pkk_dump_recursive(0, dnode)
+                print("")
             elif pkk_cfg.mode == PKK_MODE_XML:
-                pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n")
+                pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n\n")
             else:
                 pkk_fatal("Invalid operation mode?")
 
-            print("\n")
-
     except (BrokenPipeError, IOError) as e:
         sys.stderr.close()
         sys.exit(1)