# HG changeset patch # User Matti Hamalainen # Date 1620737073 -10800 # Node ID ff959de0f6c8debe21788804ac2a1ca26da3ee42 # Parent 6fa24c711f867149598b2ab925859f4aa83beeb9 Add grammatical attributes. diff -r 6fa24c711f86 -r ff959de0f6c8 lxmldump.py --- a/lxmldump.py Tue May 11 15:12:14 2021 +0300 +++ b/lxmldump.py Tue May 11 15:44:33 2021 +0300 @@ -41,6 +41,7 @@ "ahavakoittuo", "ahvaliha", "aloilleh", + "hanjahtoakseh", ] @@ -177,9 +178,29 @@ def pkk_output_node(indent, dnode): for wnode in dnode.findall("./HeadwordCtn"): - pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n") + # Create list with grammatical attributes (noun, verb, etc.) + tmpl = [] + for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): + tmpl.append(pnode.attrib["freeValue"]) + + for pnode in wnode.findall("./GrammaticalNote"): + tmpl.append(pnode.text.strip()) + + # Remove duplicates and sort the list + tmpl = list(set(tmpl)) + tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr))) + + # Print the headword and attributes if any + pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\"") + if len(tmpl) > 0: + pkk_print(" ({})\n".format(" ; ".join(tmpl))) + else: + pkk_print("\n") + + # Print main "sense" pkk_output_sense(indent + 1, wnode) + # Print any other "senses" index = 1 for wnode in dnode.findall("./SenseGrp"): pkk_printi(indent + 1, "sense #{}\n".format(index))