changeset 18:ff959de0f6c8

Add grammatical attributes.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 15:44:33 +0300
parents 6fa24c711f86
children 7c6eb57798bd
files lxmldump.py
diffstat 1 files changed, 22 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/lxmldump.py	Tue May 11 15:12:14 2021 +0300
+++ b/lxmldump.py	Tue May 11 15:44:33 2021 +0300
@@ -41,6 +41,7 @@
     "ahavakoittuo",
     "ahvaliha",
     "aloilleh",
+    "hanjahtoakseh",
 ]
 
 
@@ -177,9 +178,29 @@
 def pkk_output_node(indent, dnode):
 
     for wnode in dnode.findall("./HeadwordCtn"):
-        pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n")
+        # Create list with grammatical attributes (noun, verb, etc.)
+        tmpl = []
+        for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"):
+            tmpl.append(pnode.attrib["freeValue"])
+
+        for pnode in wnode.findall("./GrammaticalNote"):
+            tmpl.append(pnode.text.strip())
+
+        # Remove duplicates and sort the list
+        tmpl = list(set(tmpl))
+        tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr)))
+
+        # Print the headword and attributes if any
+        pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\"")
+        if len(tmpl) > 0:
+            pkk_print(" ({})\n".format(" ; ".join(tmpl)))
+        else:
+            pkk_print("\n")
+
+        # Print main "sense"
         pkk_output_sense(indent + 1, wnode)
 
+        # Print any other "senses"
         index = 1
         for wnode in dnode.findall("./SenseGrp"):
             pkk_printi(indent + 1, "sense #{}\n".format(index))