Mercurial > hg > lxmldump
comparison lxmldump.py @ 44:d7b4b2fb0214
Add support for hyphenation data.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 26 May 2021 13:25:27 +0300 |
parents | 8ed576574712 |
children | 23f00a0da841 |
comparison
equal
deleted
inserted
replaced
43:8ed576574712 | 44:d7b4b2fb0214 |
---|---|
46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", | 46 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", |
47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", | 47 PKK_MODE_ANKI: u"<a href='https://kaino.kotus.fi/cgi-bin/kks/karjala.cgi?a={href}'>{text}</a>", |
48 }, | 48 }, |
49 | 49 |
50 "word_fmt": { | 50 "word_fmt": { |
51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n", | 51 PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}", |
52 PKK_MODE_ANKI: "{word}{attr}\n", | 52 PKK_MODE_ANKI: "{word}{attr}\n", |
53 }, | 53 }, |
54 "word_attr_list": { | 54 "word_attr_list": { |
55 PKK_MODE_NORMAL: " ({alist}) ", | 55 PKK_MODE_NORMAL: " ({alist}) ", |
56 }, | 56 }, |
74 "search_list_item": { | 74 "search_list_item": { |
75 PKK_MODE_NORMAL: "\"{text}\"", | 75 PKK_MODE_NORMAL: "\"{text}\"", |
76 }, | 76 }, |
77 "search_list_sep": { | 77 "search_list_sep": { |
78 PKK_MODE_NORMAL: ", ", | 78 PKK_MODE_NORMAL: ", ", |
79 }, | |
80 | |
81 "hyphenation": { | |
82 PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n", | |
83 }, | |
84 "no_hyphenation": { | |
85 PKK_MODE_NORMAL: "", | |
79 }, | 86 }, |
80 | 87 |
81 "sense_index": { | 88 "sense_index": { |
82 PKK_MODE_NORMAL: "{indent}sense #{index}\n", | 89 PKK_MODE_NORMAL: "{indent}sense #{index}\n", |
83 PKK_MODE_ANKI: "[{index}]:\n", | 90 PKK_MODE_ANKI: "[{index}]:\n", |
365 | 372 |
366 # Remove other duplicates and sort | 373 # Remove other duplicates and sort |
367 srchlist = list(set(srchlist)) | 374 srchlist = list(set(srchlist)) |
368 srchlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) | 375 srchlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) |
369 | 376 |
377 # Get hyphenation note, if any | |
378 hnode = wnode.find("./Hyphenation") | |
379 if hnode != None: | |
380 hyphenation = pkk_get_fmt("hyphenation").format( | |
381 text=pkk_node_to_text(hnode), | |
382 indent=pkk_geti(indent + 1)) | |
383 else: | |
384 hyphenation = pkk_get_fmt("no_hyphenation").format( | |
385 indent=pkk_geti(indent + 1)) | |
386 | |
370 # Create list with grammatical attributes (noun, verb, etc.) | 387 # Create list with grammatical attributes (noun, verb, etc.) |
371 attrlist = [] | 388 attrlist = [] |
372 for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): | 389 for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): |
373 attrlist.append(pnode.attrib["freeValue"]) | 390 attrlist.append(pnode.attrib["freeValue"]) |
374 | 391 |
382 # Print the headword and attributes if any | 399 # Print the headword and attributes if any |
383 pkk_print(pkk_get_fmt("word_fmt").format( | 400 pkk_print(pkk_get_fmt("word_fmt").format( |
384 word=headword, | 401 word=headword, |
385 attr=pkk_get_list_str(attrlist, "word_attr", True), | 402 attr=pkk_get_list_str(attrlist, "word_attr", True), |
386 search=pkk_get_list_str(srchlist, "search", True), | 403 search=pkk_get_list_str(srchlist, "search", True), |
404 hyphenation=hyphenation, | |
387 indent=pkk_geti(indent))) | 405 indent=pkk_geti(indent))) |
388 | 406 |
389 # Print main "sense" | 407 # Print main "sense" |
390 pkk_output_sense(indent + 1, wnode) | 408 pkk_output_sense(indent + 1, wnode) |
391 | 409 |