Mercurial > hg > lxmldump
comparison lxmldump.py @ 18:ff959de0f6c8
Add grammatical attributes.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 11 May 2021 15:44:33 +0300 |
parents | 6fa24c711f86 |
children | 7c6eb57798bd |
comparison
equal
deleted
inserted
replaced
17:6fa24c711f86 | 18:ff959de0f6c8 |
---|---|
39 "ahavakkaine", | 39 "ahavakkaine", |
40 "ahavakala", | 40 "ahavakala", |
41 "ahavakoittuo", | 41 "ahavakoittuo", |
42 "ahvaliha", | 42 "ahvaliha", |
43 "aloilleh", | 43 "aloilleh", |
44 "hanjahtoakseh", | |
44 ] | 45 ] |
45 | 46 |
46 | 47 |
47 ### | 48 ### |
48 ### Misc. helper functions, etc | 49 ### Misc. helper functions, etc |
175 | 176 |
176 ## Output one "DictionaryEntry" node | 177 ## Output one "DictionaryEntry" node |
177 def pkk_output_node(indent, dnode): | 178 def pkk_output_node(indent, dnode): |
178 | 179 |
179 for wnode in dnode.findall("./HeadwordCtn"): | 180 for wnode in dnode.findall("./HeadwordCtn"): |
180 pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n") | 181 # Create list with grammatical attributes (noun, verb, etc.) |
182 tmpl = [] | |
183 for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): | |
184 tmpl.append(pnode.attrib["freeValue"]) | |
185 | |
186 for pnode in wnode.findall("./GrammaticalNote"): | |
187 tmpl.append(pnode.text.strip()) | |
188 | |
189 # Remove duplicates and sort the list | |
190 tmpl = list(set(tmpl)) | |
191 tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr))) | |
192 | |
193 # Print the headword and attributes if any | |
194 pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\"") | |
195 if len(tmpl) > 0: | |
196 pkk_print(" ({})\n".format(" ; ".join(tmpl))) | |
197 else: | |
198 pkk_print("\n") | |
199 | |
200 # Print main "sense" | |
181 pkk_output_sense(indent + 1, wnode) | 201 pkk_output_sense(indent + 1, wnode) |
182 | 202 |
203 # Print any other "senses" | |
183 index = 1 | 204 index = 1 |
184 for wnode in dnode.findall("./SenseGrp"): | 205 for wnode in dnode.findall("./SenseGrp"): |
185 pkk_printi(indent + 1, "sense #{}\n".format(index)) | 206 pkk_printi(indent + 1, "sense #{}\n".format(index)) |
186 pkk_output_sense(indent + 2, wnode) | 207 pkk_output_sense(indent + 2, wnode) |
187 index += 1 | 208 index += 1 |