# HG changeset patch # User Matti Hamalainen # Date 1621983775 -10800 # Node ID bc8d8ef4a248f9c7c35d4d542e5f9fa091e02a1b # Parent f53ea742b57ffd81c6b7dda1b08afe8517dd04c6 More work on output flexibility. diff -r f53ea742b57f -r bc8d8ef4a248 lxmldump.py --- a/lxmldump.py Wed May 26 01:07:04 2021 +0300 +++ b/lxmldump.py Wed May 26 02:02:55 2021 +0300 @@ -36,7 +36,7 @@ PKK_MODE_NORMAL: "normal", PKK_MODE_DUMP: "dump", PKK_MODE_XML: "xml", -# PKK_MODE_ANKI: "anki", + PKK_MODE_ANKI: "anki", } @@ -48,41 +48,46 @@ }, "word_fmt": { - PKK_MODE_NORMAL: "\"{text}\"", + PKK_MODE_NORMAL: "\"{word}\"{attr}\n", + PKK_MODE_ANKI: "{word}{attr}\n", }, - "word_attr_fmt": { - PKK_MODE_NORMAL: " ({alist})", + "word_attr_list": { + PKK_MODE_NORMAL: " ({alist}) ", }, - "word_attr_sep": { + "word_attr_list_empty": { + PKK_MODE_NORMAL: " ", + }, + "word_attr_list_sep": { PKK_MODE_NORMAL: " ; ", - PKK_MODE_ANKI: ":", - }, - "word_eol": { - PKK_MODE_NORMAL: "\n", + PKK_MODE_ANKI: " : ", }, "sense_index": { - PKK_MODE_NORMAL: "sense #{index}\n", + PKK_MODE_NORMAL: "{indent}sense #{index}\n", + PKK_MODE_ANKI: "[{index}]:\n", }, "search_fmt": { - PKK_MODE_NORMAL: "srch \"{text}\"\n", + PKK_MODE_NORMAL: "{indent}srch \"{text}\"\n", + PKK_MODE_ANKI: "{text}\n", }, "definition_fmt": { - PKK_MODE_NORMAL: "defn \"{text}\"\n", + PKK_MODE_NORMAL: "{indent}defn \"{text}\"\n", + PKK_MODE_ANKI: "? {text}\n", }, "example_fmt": { - PKK_MODE_NORMAL: "exmp \"{text}\"{geostr}\n", + PKK_MODE_NORMAL: "{indent}exmp \"{text}\"{geostr}\n", + PKK_MODE_ANKI: "- {text}{geostr}\n", }, "example_geo_list": { PKK_MODE_NORMAL: " ({glist})", }, - "example_geo_empty": { + "example_geo_list_empty": { PKK_MODE_NORMAL: "", }, - "example_geo_sep": { + "example_geo_list_sep": { PKK_MODE_NORMAL: ", ", }, @@ -137,9 +142,14 @@ sys.stderr.close() +## Get indentation string +def pkk_geti(indent): + return " " * pkk_cfg.indent * indent + + ## Print string with indentation def pkk_printi(indent, smsg): - pkk_print((" " * pkk_cfg.indent * indent) + smsg) + pkk_print(pkk_geti(indent) + smsg) ## Fatal error handler @@ -285,7 +295,9 @@ def pkk_output_subs(indent, dnode, dsub, dfmtname): dfmt = pkk_get_fmt(dfmtname) for qnode in dnode.findall(dsub): - pkk_printi(indent, dfmt.format(text=pkk_node_to_text(qnode))) + pkk_print(dfmt.format( + text=pkk_node_to_text(qnode), + indent=pkk_geti(indent))) ## Output a main "Headword" or "Sense" node under it @@ -301,12 +313,14 @@ geolist.append("{} [{}]".format(pkk_node_to_text(qnode), qnode.attrib["class"])) if len(geolist) > 0: - geostr = pkk_get_fmt("example_geo_list").format(glist=pkk_get_fmt("example_geo_sep").join(geolist)) + geostr = pkk_get_fmt("example_geo_list").format(glist=pkk_get_fmt("example_geo_list_sep").join(geolist)) else: - geostr = pkk_get_fmt("example_geo_empty") + geostr = pkk_get_fmt("example_geo_list_empty") - pkk_printi(indent + 1, pkk_get_fmt("example_fmt").format( - text=pkk_node_to_text(wnode.find("./Example")), geostr=geostr)) + pkk_print(pkk_get_fmt("example_fmt").format( + text=pkk_node_to_text(wnode.find("./Example")), + geostr=geostr, + indent=pkk_geti(indent + 1))) ## Output one "DictionaryEntry" node @@ -325,14 +339,17 @@ tmpl = list(set(tmpl)) tmpl.sort(reverse=False, key=lambda attr: (attr, len(attr))) - # Print the headword and attributes if any - pkk_output_subs(indent, wnode, "./Headword", "word_fmt") + if len(tmpl) > 0: + astr = pkk_get_fmt("word_attr_list").format( + alist=pkk_get_fmt("word_attr_list_sep").join(tmpl)) + else: + astr = pkk_get_fmt("word_attr_list_empty") - if len(tmpl) > 0: - pkk_print(pkk_get_fmt("word_attr_fmt").format( - alist=pkk_get_fmt("word_attr_sep").join(tmpl))) - - pkk_print(pkk_get_fmt("word_eol")) + # Print the headword and attributes if any + pkk_print(pkk_get_fmt("word_fmt").format( + word=pkk_node_to_text(wnode.find("./Headword")), + attr=astr, + indent=pkk_geti(indent))) # Print main "sense" pkk_output_sense(indent + 1, wnode) @@ -340,7 +357,9 @@ # Print any other "senses" index = 1 for wnode in dnode.findall("./SenseGrp"): - pkk_printi(indent + 1, pkk_get_fmt("sense_index").format(index=index)) + pkk_print(pkk_get_fmt("sense_index").format( + index=index, + indent=pkk_geti(indent + 1))) pkk_output_sense(indent + 2, wnode) index += 1