Mercurial > hg > lxmldump
changeset 44:d7b4b2fb0214
Add support for hyphenation data.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 26 May 2021 13:25:27 +0300 |
parents | 8ed576574712 |
children | 23f00a0da841 |
files | lxmldump.py |
diffstat | 1 files changed, 19 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/lxmldump.py Wed May 26 13:12:02 2021 +0300 +++ b/lxmldump.py Wed May 26 13:25:27 2021 +0300 @@ -48,7 +48,7 @@ }, "word_fmt": { - PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n", + PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}", PKK_MODE_ANKI: "{word}{attr}\n", }, "word_attr_list": { @@ -78,6 +78,13 @@ PKK_MODE_NORMAL: ", ", }, + "hyphenation": { + PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n", + }, + "no_hyphenation": { + PKK_MODE_NORMAL: "", + }, + "sense_index": { PKK_MODE_NORMAL: "{indent}sense #{index}\n", PKK_MODE_ANKI: "[{index}]:\n", @@ -367,6 +374,16 @@ srchlist = list(set(srchlist)) srchlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) + # Get hyphenation note, if any + hnode = wnode.find("./Hyphenation") + if hnode != None: + hyphenation = pkk_get_fmt("hyphenation").format( + text=pkk_node_to_text(hnode), + indent=pkk_geti(indent + 1)) + else: + hyphenation = pkk_get_fmt("no_hyphenation").format( + indent=pkk_geti(indent + 1)) + # Create list with grammatical attributes (noun, verb, etc.) attrlist = [] for pnode in wnode.findall("./PartOfSpeechCtn/PartOfSpeech"): @@ -384,6 +401,7 @@ word=headword, attr=pkk_get_list_str(attrlist, "word_attr", True), search=pkk_get_list_str(srchlist, "search", True), + hyphenation=hyphenation, indent=pkk_geti(indent))) # Print main "sense"