comparison lxmldump.py @ 55:301452a71cc7

Make sense output formatting more list-like, rename some items.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 27 May 2021 10:49:44 +0300
parents 5d3fb2f3aa21
children 71e66eca1e23
comparison
equal deleted inserted replaced
51:5d3fb2f3aa21 55:301452a71cc7
45 "ptr_fmt": { 45 "ptr_fmt": {
46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", 46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", 47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>",
48 }, 48 },
49 49
50 "word_fmt": { 50 "word_item": {
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n", 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n",
52 PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense};{other_senses}\n", 52 PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense};{other_senses}\n",
53 }, 53 },
54 "word_attr_list": { 54 "word_attr_list": {
55 PKK_MODE_NORMAL: " ({alist}) ", 55 PKK_MODE_NORMAL: " ({alist}) ",
87 }, 87 },
88 "no_hyphenation": { 88 "no_hyphenation": {
89 PKK_MODE_NORMAL: "", 89 PKK_MODE_NORMAL: "",
90 }, 90 },
91 91
92 "sense_index": { 92 "sense_list": {
93 PKK_MODE_NORMAL: "{indent}sense #{index}\n", 93 PKK_MODE_NORMAL: "{alist}",
94 PKK_MODE_ANKI: "#{index}: ", 94 },
95 }, 95 "sense_list_empty": {
96 96 PKK_MODE_NORMAL: "",
97 "definition_fmt": { 97 },
98 "sense_list_item": {
99 PKK_MODE_NORMAL: "{indent}sense #{index}\n{text}",
100 PKK_MODE_ANKI: "{text}",
101 },
102 "sense_list_sep": {
103 PKK_MODE_NORMAL: "",
104 PKK_MODE_NORMAL: ";",
105 },
106
107 "definition_item": {
98 PKK_MODE_NORMAL: "{indent}defn \"{text}\"\n", 108 PKK_MODE_NORMAL: "{indent}defn \"{text}\"\n",
99 PKK_MODE_ANKI: " * \"{text}\"", 109 PKK_MODE_ANKI: " * \"{text}\"",
100 }, 110 },
101 111
102 "example_fmt": { 112 "example_item": {
103 PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geostr}\n", 113 PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geostr}\n",
104 PKK_MODE_ANKI: " ⚫ \"{text}\"{geostr}", 114 PKK_MODE_ANKI: " ⚫ \"{text}\"{geostr}",
105 }, 115 },
106 "example_geo_list": { 116 "example_geo_list": {
107 PKK_MODE_NORMAL: " ({alist})", 117 PKK_MODE_NORMAL: " ({alist})",
335 345
336 346
337 ## Output a main "Headword" or "Sense" node 347 ## Output a main "Headword" or "Sense" node
338 def pkk_get_sense(indent, dnode): 348 def pkk_get_sense(indent, dnode):
339 # Definition for this sense 349 # Definition for this sense
340 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_fmt") 350 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_item")
341 351
342 # Examples for this sense 352 # Examples for this sense
343 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): 353 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"):
344 geolist = [] 354 geolist = []
345 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): 355 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"):
346 geolist.append(pkk_get_fmt("example_geo_list_item").format( 356 geolist.append(pkk_get_fmt("example_geo_list_item").format(
347 text=pkk_node_to_text(qnode), 357 text=pkk_node_to_text(qnode),
348 tclass=qnode.attrib["class"])) 358 tclass=qnode.attrib["class"]))
349 359
350 ostr += pkk_get_fmt("example_fmt").format( 360 ostr += pkk_get_fmt("example_item").format(
351 text=pkk_node_to_text(wnode.find("./Example")), 361 text=pkk_node_to_text(wnode.find("./Example")),
352 geostr=pkk_get_list_str(geolist, "example_geo", False), 362 geostr=pkk_get_list_str(geolist, "example_geo", False),
353 indent=pkk_geti(indent + 1)) 363 indent=pkk_geti(indent + 1))
354 364
355 return ostr 365 return ostr
400 # Get main "sense" 410 # Get main "sense"
401 msense = pkk_get_sense(indent + 1, wnode) 411 msense = pkk_get_sense(indent + 1, wnode)
402 412
403 # Print any other "senses" 413 # Print any other "senses"
404 index = 1 414 index = 1
405 osenses = "" 415 senselist = []
406 for znode in dnode.findall("./SenseGrp"): 416 for znode in dnode.findall("./SenseGrp"):
407 osenses += pkk_get_fmt("sense_index").format( 417 senselist.append(pkk_get_fmt("sense_list_item").format(
408 index=index, 418 index=index,
409 indent=pkk_geti(indent + 1)) 419 text=pkk_get_sense(indent + 2, znode),
410 osenses += pkk_get_sense(indent + 2, znode) 420 indent=pkk_geti(indent + 1)))
411 index += 1 421 index += 1
412 422
413 # Print the headword and attributes if any 423 # Print the headword and attributes if any
414 pkk_print(pkk_get_fmt("word_fmt").format( 424 pkk_print(pkk_get_fmt("word_item").format(
415 word=headword, 425 word=headword,
416 attr=pkk_get_list_str(attrlist, "word_attr", True), 426 attr=pkk_get_list_str(attrlist, "word_attr", True),
417 search=pkk_get_list_str(srchlist, "search", True), 427 search=pkk_get_list_str(srchlist, "search", True),
418 hyphenation=hyphenation, 428 hyphenation=hyphenation,
419 main_sense=msense, 429 main_sense=msense,
420 other_senses=osenses, 430 other_senses=pkk_get_list_str(senselist, "sense", False),
421 indent=pkk_geti(indent))) 431 indent=pkk_geti(indent)))
422 432
423 433
424 ### 434 ###
425 ### Main program starts 435 ### Main program starts