# HG changeset patch # User Matti Hamalainen # Date 1622543074 -10800 # Node ID 9c36574199f5853da5babda2733d8ab8ee27d90f # Parent cbed8ee15701c330c474c5116298aefd10ebefa3 Enhancements to the output flexibility. diff -r cbed8ee15701 -r 9c36574199f5 lxmldump.py --- a/lxmldump.py Mon May 31 13:59:16 2021 +0300 +++ b/lxmldump.py Tue Jun 01 13:24:34 2021 +0300 @@ -48,12 +48,12 @@ }, "word_item": { - PKK_MODE_NORMAL: "\"{word}\"{search}{attr}\n{hyphenation}{main_sense}{other_senses}\n", + PKK_MODE_NORMAL: "\"{word}\"{search}{attr}{hyphenation}{main_sense}{other_senses}\n", PKK_MODE_ANKI: "\"{word}\"{search}{attr}{hyphenation};{main_sense}{other_senses}\n", }, "word_attr_list": { - PKK_MODE_NORMAL: " ({alist}) ", - PKK_MODE_NORMAL: " ({alist})", + PKK_MODE_NORMAL: "{indent}attr \"({alist})\"\n", + PKK_MODE_ANKI: " ({alist})", }, "word_attr_list_empty": { PKK_MODE_NORMAL: " ", @@ -67,8 +67,18 @@ PKK_MODE_ANKI: " : ", }, + "hyphenation": { + PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n", + PKK_MODE_ANKI: " [hyph: {text}]", + }, + "no_hyphenation": { + PKK_MODE_NORMAL: "", + }, + + "search_list": { - PKK_MODE_NORMAL: ", {alist}", + PKK_MODE_NORMAL: ", {alist}\n", + PKK_MODE_ANKI: ", {alist}", }, "search_list_empty": { PKK_MODE_NORMAL: "", @@ -80,23 +90,19 @@ PKK_MODE_NORMAL: ", ", }, - "hyphenation": { - PKK_MODE_NORMAL: "{indent}hyph \"{text}\"\n", - PKK_MODE_ANKI: " [hyph: {text}]", + "main_sense_item": { + PKK_MODE_NORMAL: "{definition}{example_list}", }, - "no_hyphenation": { - PKK_MODE_NORMAL: "", - }, - "sense_list": { - PKK_MODE_NORMAL: " | {alist}", + PKK_MODE_NORMAL: "{alist}", + PKK_MODE_ANKI: " | {alist}", }, "sense_list_empty": { PKK_MODE_NORMAL: "", }, "sense_list_item": { - PKK_MODE_NORMAL: "{indent}sense #{index}\n{text}", - PKK_MODE_ANKI: "#{index}:{text}", + PKK_MODE_NORMAL: "{indent}sense #{index}:\n{definition}{example_list}", + PKK_MODE_ANKI: "#{index}:{definition}{example_list}", }, "sense_list_sep": { PKK_MODE_NORMAL: "", @@ -112,6 +118,11 @@ PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geo_list}\n", PKK_MODE_ANKI: " * \"{text}\"{geo_list}", }, + "example_item_sep": { + PKK_MODE_NORMAL: "", + PKK_MODE_ANKI: "", + }, + "example_geo_list": { PKK_MODE_NORMAL: " ({alist})", }, @@ -332,7 +343,7 @@ return ostr -def pkk_get_list_str(dlist, dprefix, dfilter): +def pkk_get_list_str(dindent, dlist, dprefix, dfilter): if len(dlist) > 0: if dfilter: tfmt = pkk_get_fmt(dprefix + "_list_item") @@ -341,30 +352,37 @@ tlist = dlist return pkk_get_fmt(dprefix + "_list").format( - alist=pkk_get_fmt(dprefix + "_list_sep").join(tlist)) + alist=pkk_get_fmt(dprefix + "_list_sep").join(tlist), + indent=pkk_geti(dindent)) else: - return pkk_get_fmt(dprefix + "_list_empty") + return pkk_get_fmt(dprefix + "_list_empty").format( + indent=pkk_geti(dindent)) -## Output a main "Headword" or "Sense" node -def pkk_get_sense(indent, dnode): - # Definition for this sense - ostr = pkk_get_subs(indent, dnode, "./Definition", "definition_item") - - # Examples for this sense +## Get definition nand examples from node +def pkk_get_sense(indent, dnode, dname, dindex): + exlist = [] + index = 1 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): geolist = [] for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): geolist.append(pkk_get_fmt("example_geo_list_item").format( text=pkk_node_to_text(qnode), - tclass=qnode.attrib["class"])) + tclass=qnode.attrib["class"], + indent=pkk_geti(indent + 2))) - ostr += pkk_get_fmt("example_item").format( + exlist.append(pkk_get_fmt("example_item").format( text=pkk_node_to_text(wnode.find("./Example")), - geo_list=pkk_get_list_str(geolist, "example_geo", False), - indent=pkk_geti(indent + 1)) + geo_list=pkk_get_list_str(indent + 1, geolist, "example_geo", False), + indent=pkk_geti(indent + 1), + index=index)) + index += 1 - return ostr + return pkk_get_fmt(dname).format( + definition=pkk_get_subs(indent, dnode, "./Definition", "definition_item"), + example_list=pkk_get_fmt("example_item_sep").join(exlist), + indent=pkk_geti(indent), + index=dindex) ## Output one "DictionaryEntry" node @@ -410,26 +428,23 @@ attrlist.sort(reverse=False, key=lambda attr: (attr, len(attr))) # Get main "sense" - msense = pkk_get_sense(indent + 1, wnode) + msense = pkk_get_sense(indent + 1, wnode, "main_sense_item", 0) # Print any other "senses" index = 1 senselist = [] for znode in dnode.findall("./SenseGrp"): - senselist.append(pkk_get_fmt("sense_list_item").format( - index=index, - text=pkk_get_sense(indent + 2, znode), - indent=pkk_geti(indent + 1))) + senselist.append(pkk_get_sense(indent + 1, znode, "sense_list_item", index)) index += 1 # Print the headword and attributes if any pkk_print(pkk_get_fmt("word_item").format( word=headword, - attr=pkk_get_list_str(attrlist, "word_attr", True), - search=pkk_get_list_str(srchlist, "search", True), + attr=pkk_get_list_str(indent + 1, attrlist, "word_attr", True), + search=pkk_get_list_str(indent + 1, srchlist, "search", True), hyphenation=hyphenation, main_sense=msense, - other_senses=pkk_get_list_str(senselist, "sense", False), + other_senses=pkk_get_list_str(indent + 1, senselist, "sense", False), indent=pkk_geti(indent)))