changeset 40:bc8d8ef4a248

More work on output flexibility.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 26 May 2021 02:02:55 +0300
parents f53ea742b57f
children 98c85c0b5159
files lxmldump.py
diffstat 1 files changed, 48 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Wed May 26 01:07:04 2021 +0300
+++ b/lxmldump.py	Wed May 26 02:02:55 2021 +0300
@@ -36,7 +36,7 @@
     PKK_MODE_NORMAL: "normal",
     PKK_MODE_DUMP: "dump",
     PKK_MODE_XML: "xml",
-#    PKK_MODE_ANKI: "anki",
+    PKK_MODE_ANKI: "anki",
 }
 
 
@@ -48,41 +48,46 @@
     },
 
     "word_fmt": {
-        PKK_MODE_NORMAL: "\"{text}\"",
+        PKK_MODE_NORMAL: "\"{word}\"{attr}\n",
+        PKK_MODE_ANKI: "{word}{attr}\n",
     },
-    "word_attr_fmt": {
-        PKK_MODE_NORMAL: " ({alist})",
+    "word_attr_list": {
+        PKK_MODE_NORMAL: " ({alist}) ",
     },
-    "word_attr_sep": {
+    "word_attr_list_empty": {
+        PKK_MODE_NORMAL: " ",
+    },
+    "word_attr_list_sep": {
         PKK_MODE_NORMAL: " ; ",
-        PKK_MODE_ANKI: ":",
-    },
-    "word_eol": {
-        PKK_MODE_NORMAL: "\n",
+        PKK_MODE_ANKI: " : ",
     },
 
     "sense_index": {
-        PKK_MODE_NORMAL: "sense #{index}\n",
+        PKK_MODE_NORMAL: "{indent}sense #{index}\n",
+        PKK_MODE_ANKI: "[{index}]:\n",
     },
 
     "search_fmt": {
-        PKK_MODE_NORMAL: "srch \"{text}\"\n",
+        PKK_MODE_NORMAL: "{indent}srch \"{text}\"\n",
+        PKK_MODE_ANKI: "{text}\n",
     },
 
     "definition_fmt": {
-        PKK_MODE_NORMAL: "defn \"{text}\"\n",
+        PKK_MODE_NORMAL: "{indent}defn \"{text}\"\n",
+        PKK_MODE_ANKI: "? {text}\n",
     },
 
     "example_fmt": {
-        PKK_MODE_NORMAL: "exmp \"{text}\"{geostr}\n",
+        PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geostr}\n",
+        PKK_MODE_ANKI: "- {text}{geostr}\n",
     },
     "example_geo_list": {
         PKK_MODE_NORMAL: " ({glist})",
     },
-    "example_geo_empty": {
+    "example_geo_list_empty": {
         PKK_MODE_NORMAL: "",
     },
-    "example_geo_sep": {
+    "example_geo_list_sep": {
         PKK_MODE_NORMAL: ", ",
     },
 
@@ -137,9 +142,14 @@
         sys.stderr.close()
 
 
+## Get indentation string
+def pkk_geti(indent):
+    return " " * pkk_cfg.indent * indent
+
+
 ## Print string with indentation
 def pkk_printi(indent, smsg):
-    pkk_print((" " * pkk_cfg.indent * indent) + smsg)
+    pkk_print(pkk_geti(indent) + smsg)
 
 
 ## Fatal error handler
@@ -285,7 +295,9 @@
 def pkk_output_subs(indent, dnode, dsub, dfmtname):
     dfmt = pkk_get_fmt(dfmtname)
     for qnode in dnode.findall(dsub):
-        pkk_printi(indent, dfmt.format(text=pkk_node_to_text(qnode)))
+        pkk_print(dfmt.format(
+            text=pkk_node_to_text(qnode),
+            indent=pkk_geti(indent)))
 
 
 ## Output a main "Headword" or "Sense" node under it
@@ -301,12 +313,14 @@
             geolist.append("{} [{}]".format(pkk_node_to_text(qnode), qnode.attrib["class"]))
 
         if len(geolist) > 0:
-            geostr = pkk_get_fmt("example_geo_list").format(glist=pkk_get_fmt("example_geo_sep").join(geolist))
+            geostr = pkk_get_fmt("example_geo_list").format(glist=pkk_get_fmt("example_geo_list_sep").join(geolist))
         else:
-            geostr = pkk_get_fmt("example_geo_empty")
+            geostr = pkk_get_fmt("example_geo_list_empty")
 
-        pkk_printi(indent + 1, pkk_get_fmt("example_fmt").format(
-            text=pkk_node_to_text(wnode.find("./Example")), geostr=geostr))
+        pkk_print(pkk_get_fmt("example_fmt").format(
+            text=pkk_node_to_text(wnode.find("./Example")),
+            geostr=geostr,
+            indent=pkk_geti(indent + 1)))
 
 
 ## Output one "DictionaryEntry" node
@@ -325,14 +339,17 @@
         tmpl = list(set(tmpl))
         tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr)))
 
-        # Print the headword and attributes if any
-        pkk_output_subs(indent, wnode, "./Headword", "word_fmt")
+        if len(tmpl) > 0:
+            astr = pkk_get_fmt("word_attr_list").format(
+                alist=pkk_get_fmt("word_attr_list_sep").join(tmpl))
+        else:
+            astr = pkk_get_fmt("word_attr_list_empty")
 
-        if len(tmpl) > 0:
-            pkk_print(pkk_get_fmt("word_attr_fmt").format(
-                alist=pkk_get_fmt("word_attr_sep").join(tmpl)))
-
-        pkk_print(pkk_get_fmt("word_eol"))
+        # Print the headword and attributes if any
+        pkk_print(pkk_get_fmt("word_fmt").format(
+            word=pkk_node_to_text(wnode.find("./Headword")),
+            attr=astr,
+            indent=pkk_geti(indent)))
 
         # Print main "sense"
         pkk_output_sense(indent + 1, wnode)
@@ -340,7 +357,9 @@
         # Print any other "senses"
         index = 1
         for wnode in dnode.findall("./SenseGrp"):
-            pkk_printi(indent + 1, pkk_get_fmt("sense_index").format(index=index))
+            pkk_print(pkk_get_fmt("sense_index").format(
+                index=index,
+                indent=pkk_geti(indent + 1)))
             pkk_output_sense(indent + 2, wnode)
             index += 1