changeset 61:9c36574199f5

Enhancements to the output flexibility.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 01 Jun 2021 13:24:34 +0300
parents cbed8ee15701
children 1932f588743f
files lxmldump.py
diffstat 1 files changed, 51 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Mon May 31 13:59:16 2021 +0300
+++ b/lxmldump.py	Tue Jun 01 13:24:34 2021 +0300
@@ -48,12 +48,12 @@
     },
 
     "word_item": {
-        PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n",
+        PKK_MODE_NORMAL: "\"{word}\"{search}{attr}{hyphenation}{main_sense}{other_senses}\n",
         PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense}{other_senses}\n",
     },
     "word_attr_list": {
-        PKK_MODE_NORMAL: " ({alist}) ",
-        PKK_MODE_NORMAL: " ({alist})",
+        PKK_MODE_NORMAL: "{indent}attr \"({alist})\"\n",
+        PKK_MODE_ANKI: " ({alist})",
     },
     "word_attr_list_empty": {
         PKK_MODE_NORMAL: " ",
@@ -67,8 +67,18 @@
         PKK_MODE_ANKI: " : ",
     },
 
+    "hyphenation": {
+        PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n",
+        PKK_MODE_ANKI: " [hyph: {text}]",
+    },
+    "no_hyphenation": {
+        PKK_MODE_NORMAL: "",
+    },
+
+
     "search_list": {
-        PKK_MODE_NORMAL: ", {alist}",
+        PKK_MODE_NORMAL: ", {alist}\n",
+        PKK_MODE_ANKI: ", {alist}",
     },
     "search_list_empty": {
         PKK_MODE_NORMAL: "",
@@ -80,23 +90,19 @@
         PKK_MODE_NORMAL: ", ",
     },
 
-    "hyphenation": {
-        PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n",
-        PKK_MODE_ANKI: " [hyph: {text}]",
+    "main_sense_item": {
+        PKK_MODE_NORMAL: "{definition}{example_list}",
     },
-    "no_hyphenation": {
-        PKK_MODE_NORMAL: "",
-    },
-
     "sense_list": {
-        PKK_MODE_NORMAL: " | {alist}",
+        PKK_MODE_NORMAL: "{alist}",
+        PKK_MODE_ANKI: " | {alist}",
     },
     "sense_list_empty": {
         PKK_MODE_NORMAL: "",
     },
     "sense_list_item": {
-        PKK_MODE_NORMAL: "{indent}sense #{index}\n{text}",
-        PKK_MODE_ANKI: "#{index}:{text}",
+        PKK_MODE_NORMAL: "{indent}sense #{index}:\n{definition}{example_list}",
+        PKK_MODE_ANKI: "#{index}:{definition}{example_list}",
     },
     "sense_list_sep": {
         PKK_MODE_NORMAL: "",
@@ -112,6 +118,11 @@
         PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geo_list}\n",
         PKK_MODE_ANKI: " * \"{text}\"{geo_list}",
     },
+    "example_item_sep": {
+        PKK_MODE_NORMAL: "",
+        PKK_MODE_ANKI: "",
+    },
+
     "example_geo_list": {
         PKK_MODE_NORMAL: " ({alist})",
     },
@@ -332,7 +343,7 @@
     return ostr
 
 
-def pkk_get_list_str(dlist, dprefix, dfilter):
+def pkk_get_list_str(dindent, dlist, dprefix, dfilter):
     if len(dlist) > 0:
         if dfilter:
             tfmt = pkk_get_fmt(dprefix + "_list_item")
@@ -341,30 +352,37 @@
             tlist = dlist
 
         return pkk_get_fmt(dprefix + "_list").format(
-            alist=pkk_get_fmt(dprefix + "_list_sep").join(tlist))
+            alist=pkk_get_fmt(dprefix + "_list_sep").join(tlist),
+            indent=pkk_geti(dindent))
     else:
-        return pkk_get_fmt(dprefix + "_list_empty")
+        return pkk_get_fmt(dprefix + "_list_empty").format(
+            indent=pkk_geti(dindent))
 
 
-## Output a main "Headword" or "Sense" node
-def pkk_get_sense(indent, dnode):
-    # Definition for this sense
-    ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_item")
-
-    # Examples for this sense
+## Get definition nand examples from node
+def pkk_get_sense(indent, dnode, dname, dindex):
+    exlist = []
+    index = 1
     for wnode in dnode.findall("./ExampleBlock/ExampleCtn"):
         geolist = []
         for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"):
             geolist.append(pkk_get_fmt("example_geo_list_item").format(
                 text=pkk_node_to_text(qnode),
-                tclass=qnode.attrib["class"]))
+                tclass=qnode.attrib["class"],
+                indent=pkk_geti(indent + 2)))
 
-        ostr += pkk_get_fmt("example_item").format(
+        exlist.append(pkk_get_fmt("example_item").format(
             text=pkk_node_to_text(wnode.find("./Example")),
-            geo_list=pkk_get_list_str(geolist, "example_geo", False),
-            indent=pkk_geti(indent + 1))
+            geo_list=pkk_get_list_str(indent + 1, geolist, "example_geo", False),
+            indent=pkk_geti(indent + 1),
+            index=index))
+        index += 1
 
-    return ostr
+    return pkk_get_fmt(dname).format(
+        definition=pkk_get_subs(indent, dnode, "./Definition", "definition_item"),
+        example_list=pkk_get_fmt("example_item_sep").join(exlist),
+        indent=pkk_geti(indent),
+        index=dindex)
 
 
 ## Output one "DictionaryEntry" node
@@ -410,26 +428,23 @@
         attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr)))
 
         # Get main "sense"
-        msense = pkk_get_sense(indent + 1, wnode)
+        msense = pkk_get_sense(indent + 1, wnode, "main_sense_item", 0)
 
         # Print any other "senses"
         index = 1
         senselist = []
         for znode in dnode.findall("./SenseGrp"):
-            senselist.append(pkk_get_fmt("sense_list_item").format(
-                index=index,
-                text=pkk_get_sense(indent + 2, znode),
-                indent=pkk_geti(indent + 1)))
+            senselist.append(pkk_get_sense(indent + 1, znode, "sense_list_item", index))
             index += 1
 
         # Print the headword and attributes if any
         pkk_print(pkk_get_fmt("word_item").format(
             word=headword,
-            attr=pkk_get_list_str(attrlist, "word_attr", True),
-            search=pkk_get_list_str(srchlist, "search", True),
+            attr=pkk_get_list_str(indent + 1, attrlist, "word_attr", True),
+            search=pkk_get_list_str(indent + 1, srchlist, "search", True),
             hyphenation=hyphenation,
             main_sense=msense,
-            other_senses=pkk_get_list_str(senselist, "sense", False),
+            other_senses=pkk_get_list_str(indent + 1, senselist, "sense", False),
             indent=pkk_geti(indent)))