# HG changeset patch # User Matti Hamalainen # Date 1620726555 -10800 # Node ID 3bd772fd6a50afba728e2d6b18114871cad2476f # Parent d50e71642be74593b0559c3093e81bd7ae9093d7 Cleanups. diff -r d50e71642be7 -r 3bd772fd6a50 lxmldump.py --- a/lxmldump.py Tue May 11 11:46:42 2021 +0300 +++ b/lxmldump.py Tue May 11 12:49:15 2021 +0300 @@ -54,7 +54,7 @@ return 0 -## Wrapper for print() +## Print string to stdout using normalized Unicode if enabled def pkk_print(smsg): try: if pkk_cfg["normalize"]: @@ -66,10 +66,12 @@ sys.stderr.close() +## Print string with indentation def pkk_printi(indent, smsg): pkk_print((" " * indent) + smsg) +## Check value against current verbosity level def pkk_verbosity(lvl): return pkk_cfg["verbosity"] >= lvl @@ -87,17 +89,19 @@ sys.exit(1) -## +## Clean string by removing tabs and newlines def pkk_str_clean(mstr): return re.sub(r'[\n\r\t]', '', mstr) +## Format "Ptr" node as text def pkk_ptr_to_text(pnode): return "PTR: <{}>{}".format( pnode.attrib["{http://www.w3.org/TR/xlink}href"], ("".join(pnode.itertext())).strip()) +## Get text inside a given node def pkk_get_text(lnode): stmp = "" for pnode in lnode.iter(): @@ -117,7 +121,7 @@ return stmp.strip() -## +## Simple recursive dump starting at given node def pkk_dump_recursive(indent, lnode): if lnode.tag in ["Example"]: stmp = pkk_get_text(lnode) @@ -140,20 +144,19 @@ pkk_dump_recursive(indent + 1, qnode) -## -def pkk_output_one(indent, dnode, dsub, dfmt): +## Output item under given node +def pkk_output_subs_fmt(indent, dnode, dsub, dname, dfmt): for qnode in dnode.findall(dsub): - pkk_printi(indent, dfmt.format(pkk_get_text(qnode))) + pkk_printi(indent, dfmt.format(dname, pkk_get_text(qnode))) -def pkk_output_subs(indent, dnode, dsub, dname): - for qnode in dnode.findall(dsub): - pkk_printi(indent, "{} \"{}\"\n".format(dname, pkk_get_text(qnode))) +def pkk_output_subs_prefix(indent, dnode, dsub, dname): + pkk_output_subs_fmt(indent, dnode, dsub, dname, "{0} \"{1}\"\n") def pkk_output_sense(indent, dnode): - pkk_output_subs(indent, dnode, "./SearchForm", "srch") - pkk_output_subs(indent, dnode, "./Definition", "defn") + pkk_output_subs_prefix(indent, dnode, "./SearchForm", "srch") + pkk_output_subs_prefix(indent, dnode, "./Definition", "defn") for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): sstr = pkk_get_text(wnode.find("./Example")) @@ -173,7 +176,7 @@ def pkk_output_node(indent, dnode): for wnode in dnode.findall("./HeadwordCtn"): - pkk_output_one (indent, wnode, "./Headword", "\"{}\":\n") + pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n") pkk_output_sense(indent + 1, wnode) index = 1