# HG changeset patch # User Matti Hamalainen # Date 1620721667 -10800 # Node ID 013f0cd9e5b34bb88ead6f8ad84bff0acb48c864 # Parent 2a8c65d22f8647237e048a698b843b4cbe03fac7 More work. diff -r 2a8c65d22f86 -r 013f0cd9e5b3 lxmldump.py --- a/lxmldump.py Tue May 11 09:33:21 2021 +0300 +++ b/lxmldump.py Tue May 11 11:27:47 2021 +0300 @@ -24,8 +24,9 @@ "verbosity": 3, "annotate": False, - "dump": False, + "mode": 0, "normalize": False, + "xml": False, "debug": False, } @@ -82,9 +83,15 @@ ## +def pkk_str_clean(mstr): + return re.sub(r'[\n\r\t]', '', mstr) + + def pkk_ptr_to_text(pnode): -# return "PTR: <{}>".format("".join(pnode.itertext()).strip()) - return "PTR: <{}>".format(xmlET.tostring(pnode)) +# return "PTR: {}".format(xmlET.tostring(pnode)) + return "PTR: <{}>{}".format( + pnode.attrib["{http://www.w3.org/TR/xlink}href"], + ("".join(pnode.itertext())).strip()) def pkk_get_text(lnode): @@ -94,14 +101,14 @@ stmp += pkk_ptr_to_text(pnode) else: if isinstance(pnode.text, str): - ptext = pnode.text + ptext = pkk_str_clean(pnode.text).strip() if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] else: stmp += ptext if isinstance(pnode.tail, str): - stmp += " "+ pnode.tail.strip() + stmp += pkk_str_clean(pnode.tail) return stmp.strip() @@ -113,14 +120,14 @@ pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) else: if isinstance(lnode.text, str): - stmp = lnode.text.strip() + stmp = pkk_str_clean(lnode.text).strip() if stmp != "": stmp = " \""+ stmp +"\"" else: stmp = "" if len(lnode.attrib) > 0: - atmp = " "+str(lnode.attrib) + atmp = " "+ str(lnode.attrib) else: atmp = "" @@ -198,7 +205,9 @@ if arg == "help" or arg == "h": pkk_show_help = True elif arg == "dump" or arg == "d": - pkk_cfg["dump"] = True + pkk_cfg["mode"] = 1 + elif arg == "xml" or arg == "x": + pkk_cfg["mode"] = 2 elif arg == "normalize" or arg == "n": pkk_cfg["normalize"] = True elif arg == "annotate" or arg == "a": @@ -263,10 +272,14 @@ if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list: continue - if pkk_cfg["dump"]: + if pkk_cfg["mode"] == 0: + pkk_output_node(0, dnode) + elif pkk_cfg["mode"] == 1: pkk_dump_recursive(0, dnode) + elif pkk_cfg["mode"] == 2: + pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n") else: - pkk_output_node(0, dnode) + pkk_fatal("Invalid operation mode?") print("\n")