Mercurial > hg > lxmldump
changeset 18:ff959de0f6c8
Add grammatical attributes.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 11 May 2021 15:44:33 +0300 |
parents | 6fa24c711f86 |
children | 7c6eb57798bd |
files | lxmldump.py |
diffstat | 1 files changed, 22 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/lxmldump.py Tue May 11 15:12:14 2021 +0300 +++ b/lxmldump.py Tue May 11 15:44:33 2021 +0300 @@ -41,6 +41,7 @@ "ahavakoittuo", "ahvaliha", "aloilleh", + "hanjahtoakseh", ] @@ -177,9 +178,29 @@ def pkk_output_node(indent, dnode): for wnode in dnode.findall("./HeadwordCtn"): - pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n") + # Create list with grammatical attributes (noun, verb, etc.) + tmpl = [] + for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): + tmpl.append(pnode.attrib["freeValue"]) + + for pnode in wnode.findall("./GrammaticalNote"): + tmpl.append(pnode.text.strip()) + + # Remove duplicates and sort the list + tmpl = list(set(tmpl)) + tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr))) + + # Print the headword and attributes if any + pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\"") + if len(tmpl) > 0: + pkk_print(" ({})\n".format(" ; ".join(tmpl))) + else: + pkk_print("\n") + + # Print main "sense" pkk_output_sense(indent + 1, wnode) + # Print any other "senses" index = 1 for wnode in dnode.findall("./SenseGrp"): pkk_printi(indent + 1, "sense #{}\n".format(index))