comparison lxmldump.py @ 48:6932c3f2bdeb

More flexibility.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 26 May 2021 22:41:29 +0300
parents 2e8282f1a837
children d3d4b547f86c
comparison
equal deleted inserted replaced
47:2e8282f1a837 48:6932c3f2bdeb
46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", 46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", 47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>",
48 }, 48 },
49 49
50 "word_fmt": { 50 "word_fmt": {
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}", 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n",
52 PKK_MODE_ANKI: "{word}{search}{attr}{hyphenation}\n", 52 PKK_MODE_ANKI: "{word}{search}{attr}{hyphenation}{main_sense}{other_senses}\n",
53 }, 53 },
54 "word_attr_list": { 54 "word_attr_list": {
55 PKK_MODE_NORMAL: " ({alist}) ", 55 PKK_MODE_NORMAL: " ({alist}) ",
56 }, 56 },
57 "word_attr_list_empty": { 57 "word_attr_list_empty": {
110 "example_geo_list_item": { 110 "example_geo_list_item": {
111 PKK_MODE_NORMAL: "{text} [{tclass}]", 111 PKK_MODE_NORMAL: "{text} [{tclass}]",
112 }, 112 },
113 "example_geo_list_sep": { 113 "example_geo_list_sep": {
114 PKK_MODE_NORMAL: ", ", 114 PKK_MODE_NORMAL: ", ",
115 },
116
117 "word_end": {
118 PKK_MODE_NORMAL: "\n",
119 }, 115 },
120 } 116 }
121 117
122 118
123 # Element annotation mappings 119 # Element annotation mappings
313 for qnode in lnode.findall("./*"): 309 for qnode in lnode.findall("./*"):
314 pkk_dump_recursive(indent + 1, qnode) 310 pkk_dump_recursive(indent + 1, qnode)
315 311
316 312
317 ## Output item(s) under given node with given format string 313 ## Output item(s) under given node with given format string
318 def pkk_output_subs(indent, dnode, dsub, dfmtname): 314 def pkk_get_subs(indent, dnode, dsub, dfmtname):
319 dfmt = pkk_get_fmt(dfmtname) 315 dfmt = pkk_get_fmt(dfmtname)
316 ostr = ""
320 for qnode in dnode.findall(dsub): 317 for qnode in dnode.findall(dsub):
321 pkk_print(dfmt.format( 318 ostr += dfmt.format(
322 text=pkk_node_to_text(qnode), 319 text=pkk_node_to_text(qnode),
323 indent=pkk_geti(indent))) 320 indent=pkk_geti(indent))
321 return ostr
324 322
325 323
326 def pkk_get_list_str(dlist, dprefix, dfilter): 324 def pkk_get_list_str(dlist, dprefix, dfilter):
327 if len(dlist) > 0: 325 if len(dlist) > 0:
328 if dfilter: 326 if dfilter:
336 else: 334 else:
337 return pkk_get_fmt(dprefix + "_list_empty") 335 return pkk_get_fmt(dprefix + "_list_empty")
338 336
339 337
340 ## Output a main "Headword" or "Sense" node 338 ## Output a main "Headword" or "Sense" node
341 def pkk_output_sense(indent, dnode): 339 def pkk_get_sense(indent, dnode):
342 # Definition for this sense 340 # Definition for this sense
343 pkk_output_subs(indent, dnode, "./Definition", "definition_fmt") 341 ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_fmt")
344 342
345 # Examples for this sense 343 # Examples for this sense
346 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): 344 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"):
347 geolist = [] 345 geolist = []
348 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): 346 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"):
349 geolist.append(pkk_get_fmt("example_geo_list_item").format( 347 geolist.append(pkk_get_fmt("example_geo_list_item").format(
350 text=pkk_node_to_text(qnode), 348 text=pkk_node_to_text(qnode),
351 tclass=qnode.attrib["class"])) 349 tclass=qnode.attrib["class"]))
352 350
353 pkk_print(pkk_get_fmt("example_fmt").format( 351 ostr += pkk_get_fmt("example_fmt").format(
354 text=pkk_node_to_text(wnode.find("./Example")), 352 text=pkk_node_to_text(wnode.find("./Example")),
355 geostr=pkk_get_list_str(geolist, "example_geo", False), 353 geostr=pkk_get_list_str(geolist, "example_geo", False),
356 indent=pkk_geti(indent + 1))) 354 indent=pkk_geti(indent + 1))
355
356 return ostr
357 357
358 358
359 ## Output one "DictionaryEntry" node 359 ## Output one "DictionaryEntry" node
360 def pkk_output_node(indent, dnode): 360 def pkk_output_node(indent, dnode):
361 361
396 396
397 # Remove duplicates and sort the list 397 # Remove duplicates and sort the list
398 attrlist = list(set(attrlist)) 398 attrlist = list(set(attrlist))
399 attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) 399 attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr)))
400 400
401 # Get main "sense"
402 msense = pkk_get_sense(indent + 1, wnode)
403
404 # Print any other "senses"
405 index = 1
406 osenses = ""
407 for znode in dnode.findall("./SenseGrp"):
408 osenses += pkk_get_fmt("sense_index").format(
409 index=index,
410 indent=pkk_geti(indent + 1))
411 osenses += pkk_get_sense(indent + 2, znode)
412 index += 1
413
401 # Print the headword and attributes if any 414 # Print the headword and attributes if any
402 pkk_print(pkk_get_fmt("word_fmt").format( 415 pkk_print(pkk_get_fmt("word_fmt").format(
403 word=headword, 416 word=headword,
404 attr=pkk_get_list_str(attrlist, "word_attr", True), 417 attr=pkk_get_list_str(attrlist, "word_attr", True),
405 search=pkk_get_list_str(srchlist, "search", True), 418 search=pkk_get_list_str(srchlist, "search", True),
406 hyphenation=hyphenation, 419 hyphenation=hyphenation,
420 main_sense=msense,
421 other_senses=osenses,
407 indent=pkk_geti(indent))) 422 indent=pkk_geti(indent)))
408
409 # Print main "sense"
410 pkk_output_sense(indent + 1, wnode)
411
412 # Print any other "senses"
413 index = 1
414 for wnode in dnode.findall("./SenseGrp"):
415 pkk_print(pkk_get_fmt("sense_index").format(
416 index=index,
417 indent=pkk_geti(indent + 1)))
418 pkk_output_sense(indent + 2, wnode)
419 index += 1
420
421 pkk_print(pkk_get_fmt("word_end"))
422 423
423 424
424 ### 425 ###
425 ### Main program starts 426 ### Main program starts
426 ### 427 ###