Mercurial > hg > lxmldump
comparison lxmldump.py @ 61:9c36574199f5
Enhancements to the output flexibility.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 01 Jun 2021 13:24:34 +0300 |
parents | cbed8ee15701 |
children | 1932f588743f |
comparison
equal
deleted
inserted
replaced
60:cbed8ee15701 | 61:9c36574199f5 |
---|---|
46 PKK_MODE_NORMAL: "<PTR:{href}>{text}</PTR>", | 46 PKK_MODE_NORMAL: "<PTR:{href}>{text}</PTR>", |
47 PKK_MODE_ANKI: "<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", | 47 PKK_MODE_ANKI: "<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", |
48 }, | 48 }, |
49 | 49 |
50 "word_item": { | 50 "word_item": { |
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n", | 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}{hyphenation}{main_sense}{other_senses}\n", |
52 PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense}{other_senses}\n", | 52 PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense}{other_senses}\n", |
53 }, | 53 }, |
54 "word_attr_list": { | 54 "word_attr_list": { |
55 PKK_MODE_NORMAL: " ({alist}) ", | 55 PKK_MODE_NORMAL: "{indent}attr \"({alist})\"\n", |
56 PKK_MODE_NORMAL: " ({alist})", | 56 PKK_MODE_ANKI: " ({alist})", |
57 }, | 57 }, |
58 "word_attr_list_empty": { | 58 "word_attr_list_empty": { |
59 PKK_MODE_NORMAL: " ", | 59 PKK_MODE_NORMAL: " ", |
60 PKK_MODE_ANKI: "", | 60 PKK_MODE_ANKI: "", |
61 }, | 61 }, |
65 "word_attr_list_sep": { | 65 "word_attr_list_sep": { |
66 PKK_MODE_NORMAL: " ; ", | 66 PKK_MODE_NORMAL: " ; ", |
67 PKK_MODE_ANKI: " : ", | 67 PKK_MODE_ANKI: " : ", |
68 }, | 68 }, |
69 | 69 |
70 "search_list": { | |
71 PKK_MODE_NORMAL: ", {alist}", | |
72 }, | |
73 "search_list_empty": { | |
74 PKK_MODE_NORMAL: "", | |
75 }, | |
76 "search_list_item": { | |
77 PKK_MODE_NORMAL: "\"{text}\"", | |
78 }, | |
79 "search_list_sep": { | |
80 PKK_MODE_NORMAL: ", ", | |
81 }, | |
82 | |
83 "hyphenation": { | 70 "hyphenation": { |
84 PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n", | 71 PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n", |
85 PKK_MODE_ANKI: " [hyph: {text}]", | 72 PKK_MODE_ANKI: " [hyph: {text}]", |
86 }, | 73 }, |
87 "no_hyphenation": { | 74 "no_hyphenation": { |
88 PKK_MODE_NORMAL: "", | 75 PKK_MODE_NORMAL: "", |
89 }, | 76 }, |
90 | 77 |
78 | |
79 "search_list": { | |
80 PKK_MODE_NORMAL: ", {alist}\n", | |
81 PKK_MODE_ANKI: ", {alist}", | |
82 }, | |
83 "search_list_empty": { | |
84 PKK_MODE_NORMAL: "", | |
85 }, | |
86 "search_list_item": { | |
87 PKK_MODE_NORMAL: "\"{text}\"", | |
88 }, | |
89 "search_list_sep": { | |
90 PKK_MODE_NORMAL: ", ", | |
91 }, | |
92 | |
93 "main_sense_item": { | |
94 PKK_MODE_NORMAL: "{definition}{example_list}", | |
95 }, | |
91 "sense_list": { | 96 "sense_list": { |
92 PKK_MODE_NORMAL: " | {alist}", | 97 PKK_MODE_NORMAL: "{alist}", |
98 PKK_MODE_ANKI: " | {alist}", | |
93 }, | 99 }, |
94 "sense_list_empty": { | 100 "sense_list_empty": { |
95 PKK_MODE_NORMAL: "", | 101 PKK_MODE_NORMAL: "", |
96 }, | 102 }, |
97 "sense_list_item": { | 103 "sense_list_item": { |
98 PKK_MODE_NORMAL: "{indent}sense #{index}\n{text}", | 104 PKK_MODE_NORMAL: "{indent}sense #{index}:\n{definition}{example_list}", |
99 PKK_MODE_ANKI: "#{index}:{text}", | 105 PKK_MODE_ANKI: "#{index}:{definition}{example_list}", |
100 }, | 106 }, |
101 "sense_list_sep": { | 107 "sense_list_sep": { |
102 PKK_MODE_NORMAL: "", | 108 PKK_MODE_NORMAL: "", |
103 PKK_MODE_NORMAL: " | ", | 109 PKK_MODE_NORMAL: " | ", |
104 }, | 110 }, |
110 | 116 |
111 "example_item": { | 117 "example_item": { |
112 PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geo_list}\n", | 118 PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geo_list}\n", |
113 PKK_MODE_ANKI: " * \"{text}\"{geo_list}", | 119 PKK_MODE_ANKI: " * \"{text}\"{geo_list}", |
114 }, | 120 }, |
121 "example_item_sep": { | |
122 PKK_MODE_NORMAL: "", | |
123 PKK_MODE_ANKI: "", | |
124 }, | |
125 | |
115 "example_geo_list": { | 126 "example_geo_list": { |
116 PKK_MODE_NORMAL: " ({alist})", | 127 PKK_MODE_NORMAL: " ({alist})", |
117 }, | 128 }, |
118 "example_geo_list_empty": { | 129 "example_geo_list_empty": { |
119 PKK_MODE_NORMAL: "", | 130 PKK_MODE_NORMAL: "", |
330 text=pkk_node_to_text(qnode), | 341 text=pkk_node_to_text(qnode), |
331 indent=pkk_geti(indent)) | 342 indent=pkk_geti(indent)) |
332 return ostr | 343 return ostr |
333 | 344 |
334 | 345 |
335 def pkk_get_list_str(dlist, dprefix, dfilter): | 346 def pkk_get_list_str(dindent, dlist, dprefix, dfilter): |
336 if len(dlist) > 0: | 347 if len(dlist) > 0: |
337 if dfilter: | 348 if dfilter: |
338 tfmt = pkk_get_fmt(dprefix + "_list_item") | 349 tfmt = pkk_get_fmt(dprefix + "_list_item") |
339 tlist = [tfmt.format(text=i) for i in dlist] | 350 tlist = [tfmt.format(text=i) for i in dlist] |
340 else: | 351 else: |
341 tlist = dlist | 352 tlist = dlist |
342 | 353 |
343 return pkk_get_fmt(dprefix + "_list").format( | 354 return pkk_get_fmt(dprefix + "_list").format( |
344 alist=pkk_get_fmt(dprefix + "_list_sep").join(tlist)) | 355 alist=pkk_get_fmt(dprefix + "_list_sep").join(tlist), |
356 indent=pkk_geti(dindent)) | |
345 else: | 357 else: |
346 return pkk_get_fmt(dprefix + "_list_empty") | 358 return pkk_get_fmt(dprefix + "_list_empty").format( |
347 | 359 indent=pkk_geti(dindent)) |
348 | 360 |
349 ## Output a main "Headword" or "Sense" node | 361 |
350 def pkk_get_sense(indent, dnode): | 362 ## Get definition nand examples from node |
351 # Definition for this sense | 363 def pkk_get_sense(indent, dnode, dname, dindex): |
352 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_item") | 364 exlist = [] |
353 | 365 index = 1 |
354 # Examples for this sense | |
355 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): | 366 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): |
356 geolist = [] | 367 geolist = [] |
357 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): | 368 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): |
358 geolist.append(pkk_get_fmt("example_geo_list_item").format( | 369 geolist.append(pkk_get_fmt("example_geo_list_item").format( |
359 text=pkk_node_to_text(qnode), | 370 text=pkk_node_to_text(qnode), |
360 tclass=qnode.attrib["class"])) | 371 tclass=qnode.attrib["class"], |
361 | 372 indent=pkk_geti(indent + 2))) |
362 ostr += pkk_get_fmt("example_item").format( | 373 |
374 exlist.append(pkk_get_fmt("example_item").format( | |
363 text=pkk_node_to_text(wnode.find("./Example")), | 375 text=pkk_node_to_text(wnode.find("./Example")), |
364 geo_list=pkk_get_list_str(geolist, "example_geo", False), | 376 geo_list=pkk_get_list_str(indent + 1, geolist, "example_geo", False), |
365 indent=pkk_geti(indent + 1)) | 377 indent=pkk_geti(indent + 1), |
366 | 378 index=index)) |
367 return ostr | 379 index += 1 |
380 | |
381 return pkk_get_fmt(dname).format( | |
382 definition=pkk_get_subs(indent, dnode, "./Definition", "definition_item"), | |
383 example_list=pkk_get_fmt("example_item_sep").join(exlist), | |
384 indent=pkk_geti(indent), | |
385 index=dindex) | |
368 | 386 |
369 | 387 |
370 ## Output one "DictionaryEntry" node | 388 ## Output one "DictionaryEntry" node |
371 def pkk_output_node(indent, dnode): | 389 def pkk_output_node(indent, dnode): |
372 | 390 |
408 # Remove duplicates and sort the list | 426 # Remove duplicates and sort the list |
409 attrlist = list(set(attrlist)) | 427 attrlist = list(set(attrlist)) |
410 attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) | 428 attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) |
411 | 429 |
412 # Get main "sense" | 430 # Get main "sense" |
413 msense = pkk_get_sense(indent + 1, wnode) | 431 msense = pkk_get_sense(indent + 1, wnode, "main_sense_item", 0) |
414 | 432 |
415 # Print any other "senses" | 433 # Print any other "senses" |
416 index = 1 | 434 index = 1 |
417 senselist = [] | 435 senselist = [] |
418 for znode in dnode.findall("./SenseGrp"): | 436 for znode in dnode.findall("./SenseGrp"): |
419 senselist.append(pkk_get_fmt("sense_list_item").format( | 437 senselist.append(pkk_get_sense(indent + 1, znode, "sense_list_item", index)) |
420 index=index, | |
421 text=pkk_get_sense(indent + 2, znode), | |
422 indent=pkk_geti(indent + 1))) | |
423 index += 1 | 438 index += 1 |
424 | 439 |
425 # Print the headword and attributes if any | 440 # Print the headword and attributes if any |
426 pkk_print(pkk_get_fmt("word_item").format( | 441 pkk_print(pkk_get_fmt("word_item").format( |
427 word=headword, | 442 word=headword, |
428 attr=pkk_get_list_str(attrlist, "word_attr", True), | 443 attr=pkk_get_list_str(indent + 1, attrlist, "word_attr", True), |
429 search=pkk_get_list_str(srchlist, "search", True), | 444 search=pkk_get_list_str(indent + 1, srchlist, "search", True), |
430 hyphenation=hyphenation, | 445 hyphenation=hyphenation, |
431 main_sense=msense, | 446 main_sense=msense, |
432 other_senses=pkk_get_list_str(senselist, "sense", False), | 447 other_senses=pkk_get_list_str(indent + 1, senselist, "sense", False), |
433 indent=pkk_geti(indent))) | 448 indent=pkk_geti(indent))) |
434 | 449 |
435 | 450 |
436 ### | 451 ### |
437 ### Main program starts | 452 ### Main program starts |