comparison lxmldump.py @ 18:ff959de0f6c8

Add grammatical attributes.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 15:44:33 +0300
parents 6fa24c711f86
children 7c6eb57798bd
comparison
equal deleted inserted replaced
17:6fa24c711f86 18:ff959de0f6c8
39 "ahavakkaine", 39 "ahavakkaine",
40 "ahavakala", 40 "ahavakala",
41 "ahavakoittuo", 41 "ahavakoittuo",
42 "ahvaliha", 42 "ahvaliha",
43 "aloilleh", 43 "aloilleh",
44 "hanjahtoakseh",
44 ] 45 ]
45 46
46 47
47 ### 48 ###
48 ### Misc. helper functions, etc 49 ### Misc. helper functions, etc
175 176
176 ## Output one "DictionaryEntry" node 177 ## Output one "DictionaryEntry" node
177 def pkk_output_node(indent, dnode): 178 def pkk_output_node(indent, dnode):
178 179
179 for wnode in dnode.findall("./HeadwordCtn"): 180 for wnode in dnode.findall("./HeadwordCtn"):
180 pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n") 181 # Create list with grammatical attributes (noun, verb, etc.)
182 tmpl = []
183 for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"):
184 tmpl.append(pnode.attrib["freeValue"])
185
186 for pnode in wnode.findall("./GrammaticalNote"):
187 tmpl.append(pnode.text.strip())
188
189 # Remove duplicates and sort the list
190 tmpl = list(set(tmpl))
191 tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr)))
192
193 # Print the headword and attributes if any
194 pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\"")
195 if len(tmpl) > 0:
196 pkk_print(" ({})\n".format(" ; ".join(tmpl)))
197 else:
198 pkk_print("\n")
199
200 # Print main "sense"
181 pkk_output_sense(indent + 1, wnode) 201 pkk_output_sense(indent + 1, wnode)
182 202
203 # Print any other "senses"
183 index = 1 204 index = 1
184 for wnode in dnode.findall("./SenseGrp"): 205 for wnode in dnode.findall("./SenseGrp"):
185 pkk_printi(indent + 1, "sense #{}\n".format(index)) 206 pkk_printi(indent + 1, "sense #{}\n".format(index))
186 pkk_output_sense(indent + 2, wnode) 207 pkk_output_sense(indent + 2, wnode)
187 index += 1 208 index += 1