Mercurial > hg > lxmldump
comparison lxmldump.py @ 55:301452a71cc7
Make sense output formatting more list-like, rename some items.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 27 May 2021 10:49:44 +0300 |
parents | 5d3fb2f3aa21 |
children | 71e66eca1e23 |
comparison
equal
deleted
inserted
replaced
51:5d3fb2f3aa21 | 55:301452a71cc7 |
---|---|
45 "ptr_fmt": { | 45 "ptr_fmt": { |
46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", | 46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", |
47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", | 47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", |
48 }, | 48 }, |
49 | 49 |
50 "word_fmt": { | 50 "word_item": { |
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n", | 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n", |
52 PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense};{other_senses}\n", | 52 PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense};{other_senses}\n", |
53 }, | 53 }, |
54 "word_attr_list": { | 54 "word_attr_list": { |
55 PKK_MODE_NORMAL: " ({alist}) ", | 55 PKK_MODE_NORMAL: " ({alist}) ", |
87 }, | 87 }, |
88 "no_hyphenation": { | 88 "no_hyphenation": { |
89 PKK_MODE_NORMAL: "", | 89 PKK_MODE_NORMAL: "", |
90 }, | 90 }, |
91 | 91 |
92 "sense_index": { | 92 "sense_list": { |
93 PKK_MODE_NORMAL: "{indent}sense #{index}\n", | 93 PKK_MODE_NORMAL: "{alist}", |
94 PKK_MODE_ANKI: "#{index}: ", | 94 }, |
95 }, | 95 "sense_list_empty": { |
96 | 96 PKK_MODE_NORMAL: "", |
97 "definition_fmt": { | 97 }, |
98 "sense_list_item": { | |
99 PKK_MODE_NORMAL: "{indent}sense #{index}\n{text}", | |
100 PKK_MODE_ANKI: "{text}", | |
101 }, | |
102 "sense_list_sep": { | |
103 PKK_MODE_NORMAL: "", | |
104 PKK_MODE_NORMAL: ";", | |
105 }, | |
106 | |
107 "definition_item": { | |
98 PKK_MODE_NORMAL: "{indent}defn \"{text}\"\n", | 108 PKK_MODE_NORMAL: "{indent}defn \"{text}\"\n", |
99 PKK_MODE_ANKI: " * \"{text}\"", | 109 PKK_MODE_ANKI: " * \"{text}\"", |
100 }, | 110 }, |
101 | 111 |
102 "example_fmt": { | 112 "example_item": { |
103 PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geostr}\n", | 113 PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geostr}\n", |
104 PKK_MODE_ANKI: " ⚫ \"{text}\"{geostr}", | 114 PKK_MODE_ANKI: " ⚫ \"{text}\"{geostr}", |
105 }, | 115 }, |
106 "example_geo_list": { | 116 "example_geo_list": { |
107 PKK_MODE_NORMAL: " ({alist})", | 117 PKK_MODE_NORMAL: " ({alist})", |
335 | 345 |
336 | 346 |
337 ## Output a main "Headword" or "Sense" node | 347 ## Output a main "Headword" or "Sense" node |
338 def pkk_get_sense(indent, dnode): | 348 def pkk_get_sense(indent, dnode): |
339 # Definition for this sense | 349 # Definition for this sense |
340 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_fmt") | 350 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_item") |
341 | 351 |
342 # Examples for this sense | 352 # Examples for this sense |
343 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): | 353 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): |
344 geolist = [] | 354 geolist = [] |
345 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): | 355 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): |
346 geolist.append(pkk_get_fmt("example_geo_list_item").format( | 356 geolist.append(pkk_get_fmt("example_geo_list_item").format( |
347 text=pkk_node_to_text(qnode), | 357 text=pkk_node_to_text(qnode), |
348 tclass=qnode.attrib["class"])) | 358 tclass=qnode.attrib["class"])) |
349 | 359 |
350 ostr += pkk_get_fmt("example_fmt").format( | 360 ostr += pkk_get_fmt("example_item").format( |
351 text=pkk_node_to_text(wnode.find("./Example")), | 361 text=pkk_node_to_text(wnode.find("./Example")), |
352 geostr=pkk_get_list_str(geolist, "example_geo", False), | 362 geostr=pkk_get_list_str(geolist, "example_geo", False), |
353 indent=pkk_geti(indent + 1)) | 363 indent=pkk_geti(indent + 1)) |
354 | 364 |
355 return ostr | 365 return ostr |
400 # Get main "sense" | 410 # Get main "sense" |
401 msense = pkk_get_sense(indent + 1, wnode) | 411 msense = pkk_get_sense(indent + 1, wnode) |
402 | 412 |
403 # Print any other "senses" | 413 # Print any other "senses" |
404 index = 1 | 414 index = 1 |
405 osenses = "" | 415 senselist = [] |
406 for znode in dnode.findall("./SenseGrp"): | 416 for znode in dnode.findall("./SenseGrp"): |
407 osenses += pkk_get_fmt("sense_index").format( | 417 senselist.append(pkk_get_fmt("sense_list_item").format( |
408 index=index, | 418 index=index, |
409 indent=pkk_geti(indent + 1)) | 419 text=pkk_get_sense(indent + 2, znode), |
410 osenses += pkk_get_sense(indent + 2, znode) | 420 indent=pkk_geti(indent + 1))) |
411 index += 1 | 421 index += 1 |
412 | 422 |
413 # Print the headword and attributes if any | 423 # Print the headword and attributes if any |
414 pkk_print(pkk_get_fmt("word_fmt").format( | 424 pkk_print(pkk_get_fmt("word_item").format( |
415 word=headword, | 425 word=headword, |
416 attr=pkk_get_list_str(attrlist, "word_attr", True), | 426 attr=pkk_get_list_str(attrlist, "word_attr", True), |
417 search=pkk_get_list_str(srchlist, "search", True), | 427 search=pkk_get_list_str(srchlist, "search", True), |
418 hyphenation=hyphenation, | 428 hyphenation=hyphenation, |
419 main_sense=msense, | 429 main_sense=msense, |
420 other_senses=osenses, | 430 other_senses=pkk_get_list_str(senselist, "sense", False), |
421 indent=pkk_geti(indent))) | 431 indent=pkk_geti(indent))) |
422 | 432 |
423 | 433 |
424 ### | 434 ### |
425 ### Main program starts | 435 ### Main program starts |