Mercurial > hg > lxmldump
comparison lxmldump.py @ 48:6932c3f2bdeb
More flexibility.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 26 May 2021 22:41:29 +0300 |
parents | 2e8282f1a837 |
children | d3d4b547f86c |
comparison
equal
deleted
inserted
replaced
47:2e8282f1a837 | 48:6932c3f2bdeb |
---|---|
46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", | 46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", |
47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", | 47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", |
48 }, | 48 }, |
49 | 49 |
50 "word_fmt": { | 50 "word_fmt": { |
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}", | 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n", |
52 PKK_MODE_ANKI: "{word}{search}{attr}{hyphenation}\n", | 52 PKK_MODE_ANKI: "{word}{search}{attr}{hyphenation}{main_sense}{other_senses}\n", |
53 }, | 53 }, |
54 "word_attr_list": { | 54 "word_attr_list": { |
55 PKK_MODE_NORMAL: " ({alist}) ", | 55 PKK_MODE_NORMAL: " ({alist}) ", |
56 }, | 56 }, |
57 "word_attr_list_empty": { | 57 "word_attr_list_empty": { |
110 "example_geo_list_item": { | 110 "example_geo_list_item": { |
111 PKK_MODE_NORMAL: "{text} [{tclass}]", | 111 PKK_MODE_NORMAL: "{text} [{tclass}]", |
112 }, | 112 }, |
113 "example_geo_list_sep": { | 113 "example_geo_list_sep": { |
114 PKK_MODE_NORMAL: ", ", | 114 PKK_MODE_NORMAL: ", ", |
115 }, | |
116 | |
117 "word_end": { | |
118 PKK_MODE_NORMAL: "\n", | |
119 }, | 115 }, |
120 } | 116 } |
121 | 117 |
122 | 118 |
123 # Element annotation mappings | 119 # Element annotation mappings |
313 for qnode in lnode.findall("./*"): | 309 for qnode in lnode.findall("./*"): |
314 pkk_dump_recursive(indent + 1, qnode) | 310 pkk_dump_recursive(indent + 1, qnode) |
315 | 311 |
316 | 312 |
317 ## Output item(s) under given node with given format string | 313 ## Output item(s) under given node with given format string |
318 def pkk_output_subs(indent, dnode, dsub, dfmtname): | 314 def pkk_get_subs(indent, dnode, dsub, dfmtname): |
319 dfmt = pkk_get_fmt(dfmtname) | 315 dfmt = pkk_get_fmt(dfmtname) |
316 ostr = "" | |
320 for qnode in dnode.findall(dsub): | 317 for qnode in dnode.findall(dsub): |
321 pkk_print(dfmt.format( | 318 ostr += dfmt.format( |
322 text=pkk_node_to_text(qnode), | 319 text=pkk_node_to_text(qnode), |
323 indent=pkk_geti(indent))) | 320 indent=pkk_geti(indent)) |
321 return ostr | |
324 | 322 |
325 | 323 |
326 def pkk_get_list_str(dlist, dprefix, dfilter): | 324 def pkk_get_list_str(dlist, dprefix, dfilter): |
327 if len(dlist) > 0: | 325 if len(dlist) > 0: |
328 if dfilter: | 326 if dfilter: |
336 else: | 334 else: |
337 return pkk_get_fmt(dprefix + "_list_empty") | 335 return pkk_get_fmt(dprefix + "_list_empty") |
338 | 336 |
339 | 337 |
340 ## Output a main "Headword" or "Sense" node | 338 ## Output a main "Headword" or "Sense" node |
341 def pkk_output_sense(indent, dnode): | 339 def pkk_get_sense(indent, dnode): |
342 # Definition for this sense | 340 # Definition for this sense |
343 pkk_output_subs(indent, dnode, "./Definition", "definition_fmt") | 341 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_fmt") |
344 | 342 |
345 # Examples for this sense | 343 # Examples for this sense |
346 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): | 344 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): |
347 geolist = [] | 345 geolist = [] |
348 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): | 346 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): |
349 geolist.append(pkk_get_fmt("example_geo_list_item").format( | 347 geolist.append(pkk_get_fmt("example_geo_list_item").format( |
350 text=pkk_node_to_text(qnode), | 348 text=pkk_node_to_text(qnode), |
351 tclass=qnode.attrib["class"])) | 349 tclass=qnode.attrib["class"])) |
352 | 350 |
353 pkk_print(pkk_get_fmt("example_fmt").format( | 351 ostr += pkk_get_fmt("example_fmt").format( |
354 text=pkk_node_to_text(wnode.find("./Example")), | 352 text=pkk_node_to_text(wnode.find("./Example")), |
355 geostr=pkk_get_list_str(geolist, "example_geo", False), | 353 geostr=pkk_get_list_str(geolist, "example_geo", False), |
356 indent=pkk_geti(indent + 1))) | 354 indent=pkk_geti(indent + 1)) |
355 | |
356 return ostr | |
357 | 357 |
358 | 358 |
359 ## Output one "DictionaryEntry" node | 359 ## Output one "DictionaryEntry" node |
360 def pkk_output_node(indent, dnode): | 360 def pkk_output_node(indent, dnode): |
361 | 361 |
396 | 396 |
397 # Remove duplicates and sort the list | 397 # Remove duplicates and sort the list |
398 attrlist = list(set(attrlist)) | 398 attrlist = list(set(attrlist)) |
399 attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) | 399 attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) |
400 | 400 |
401 # Get main "sense" | |
402 msense = pkk_get_sense(indent + 1, wnode) | |
403 | |
404 # Print any other "senses" | |
405 index = 1 | |
406 osenses = "" | |
407 for znode in dnode.findall("./SenseGrp"): | |
408 osenses += pkk_get_fmt("sense_index").format( | |
409 index=index, | |
410 indent=pkk_geti(indent + 1)) | |
411 osenses += pkk_get_sense(indent + 2, znode) | |
412 index += 1 | |
413 | |
401 # Print the headword and attributes if any | 414 # Print the headword and attributes if any |
402 pkk_print(pkk_get_fmt("word_fmt").format( | 415 pkk_print(pkk_get_fmt("word_fmt").format( |
403 word=headword, | 416 word=headword, |
404 attr=pkk_get_list_str(attrlist, "word_attr", True), | 417 attr=pkk_get_list_str(attrlist, "word_attr", True), |
405 search=pkk_get_list_str(srchlist, "search", True), | 418 search=pkk_get_list_str(srchlist, "search", True), |
406 hyphenation=hyphenation, | 419 hyphenation=hyphenation, |
420 main_sense=msense, | |
421 other_senses=osenses, | |
407 indent=pkk_geti(indent))) | 422 indent=pkk_geti(indent))) |
408 | |
409 # Print main "sense" | |
410 pkk_output_sense(indent + 1, wnode) | |
411 | |
412 # Print any other "senses" | |
413 index = 1 | |
414 for wnode in dnode.findall("./SenseGrp"): | |
415 pkk_print(pkk_get_fmt("sense_index").format( | |
416 index=index, | |
417 indent=pkk_geti(indent + 1))) | |
418 pkk_output_sense(indent + 2, wnode) | |
419 index += 1 | |
420 | |
421 pkk_print(pkk_get_fmt("word_end")) | |
422 | 423 |
423 | 424 |
424 ### | 425 ### |
425 ### Main program starts | 426 ### Main program starts |
426 ### | 427 ### |