comparison lxmldump.py @ 44:d7b4b2fb0214

Add support for hyphenation data.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 26 May 2021 13:25:27 +0300
parents 8ed576574712
children 23f00a0da841
comparison
equal deleted inserted replaced
43:8ed576574712 44:d7b4b2fb0214
46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", 46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", 47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>",
48 }, 48 },
49 49
50 "word_fmt": { 50 "word_fmt": {
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n", 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}",
52 PKK_MODE_ANKI: "{word}{attr}\n", 52 PKK_MODE_ANKI: "{word}{attr}\n",
53 }, 53 },
54 "word_attr_list": { 54 "word_attr_list": {
55 PKK_MODE_NORMAL: " ({alist}) ", 55 PKK_MODE_NORMAL: " ({alist}) ",
56 }, 56 },
74 "search_list_item": { 74 "search_list_item": {
75 PKK_MODE_NORMAL: "\"{text}\"", 75 PKK_MODE_NORMAL: "\"{text}\"",
76 }, 76 },
77 "search_list_sep": { 77 "search_list_sep": {
78 PKK_MODE_NORMAL: ", ", 78 PKK_MODE_NORMAL: ", ",
79 },
80
81 "hyphenation": {
82 PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n",
83 },
84 "no_hyphenation": {
85 PKK_MODE_NORMAL: "",
79 }, 86 },
80 87
81 "sense_index": { 88 "sense_index": {
82 PKK_MODE_NORMAL: "{indent}sense #{index}\n", 89 PKK_MODE_NORMAL: "{indent}sense #{index}\n",
83 PKK_MODE_ANKI: "[{index}]:\n", 90 PKK_MODE_ANKI: "[{index}]:\n",
365 372
366 # Remove other duplicates and sort 373 # Remove other duplicates and sort
367 srchlist = list(set(srchlist)) 374 srchlist = list(set(srchlist))
368 srchlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) 375 srchlist.sort(reverse=False, key=lambda attr: (attr, len(attr)))
369 376
377 # Get hyphenation note, if any
378 hnode = wnode.find("./Hyphenation")
379 if hnode != None:
380 hyphenation = pkk_get_fmt("hyphenation").format(
381 text=pkk_node_to_text(hnode),
382 indent=pkk_geti(indent + 1))
383 else:
384 hyphenation = pkk_get_fmt("no_hyphenation").format(
385 indent=pkk_geti(indent + 1))
386
370 # Create list with grammatical attributes (noun, verb, etc.) 387 # Create list with grammatical attributes (noun, verb, etc.)
371 attrlist = [] 388 attrlist = []
372 for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): 389 for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"):
373 attrlist.append(pnode.attrib["freeValue"]) 390 attrlist.append(pnode.attrib["freeValue"])
374 391
382 # Print the headword and attributes if any 399 # Print the headword and attributes if any
383 pkk_print(pkk_get_fmt("word_fmt").format( 400 pkk_print(pkk_get_fmt("word_fmt").format(
384 word=headword, 401 word=headword,
385 attr=pkk_get_list_str(attrlist, "word_attr", True), 402 attr=pkk_get_list_str(attrlist, "word_attr", True),
386 search=pkk_get_list_str(srchlist, "search", True), 403 search=pkk_get_list_str(srchlist, "search", True),
404 hyphenation=hyphenation,
387 indent=pkk_geti(indent))) 405 indent=pkk_geti(indent)))
388 406
389 # Print main "sense" 407 # Print main "sense"
390 pkk_output_sense(indent + 1, wnode) 408 pkk_output_sense(indent + 1, wnode)
391 409