# HG changeset patch # User Matti Hamalainen # Date 1622458756 -10800 # Node ID cbed8ee15701c330c474c5116298aefd10ebefa3 # Parent d52696972bf2f4397ef4ccfcf83a5351ab13d1d8 Cleanups. diff -r d52696972bf2 -r cbed8ee15701 lxmldump.py --- a/lxmldump.py Mon May 31 13:28:48 2021 +0300 +++ b/lxmldump.py Mon May 31 13:59:16 2021 +0300 @@ -268,14 +268,17 @@ ## Clean string by removing tabs and newlines def pkk_str_clean(mstr): - return re.sub(r'[\n\r\t]', '', mstr) + return re.sub(r'[\n\r\t]', '', mstr).strip() ## Format a "Ptr" node as text def pkk_ptr_to_text(pnode): pfmt = pkk_get_fmt("ptr_fmt") + ptext = ("".join(pnode.itertext())).strip() + if pkk_cfg.annotate: + ptext = re.sub(r'\s*\.\s*$', '', ptext) return pfmt.format( - text=re.sub(r'\.$', '', ("".join(pnode.itertext())).strip()), + text=ptext, href=pnode.attrib["{http://www.w3.org/TR/xlink}href"]) @@ -287,12 +290,12 @@ stmp += pkk_ptr_to_text(pnode) else: if isinstance(pnode.text, str): - stmp += pkk_str_annotate(pnode.tag, pkk_str_clean(pnode.text)) + stmp += pkk_str_annotate(pnode.tag, pnode.text) if isinstance(pnode.tail, str): - stmp += pkk_str_clean(pnode.tail) + stmp += pnode.tail - return re.sub(r'\.$', '', stmp.strip()) + return pkk_str_clean(re.sub(r'\s*\.\s*$', '', stmp)) ## Simple recursive dump starting at given node @@ -302,7 +305,7 @@ pkk_printi(indent, f"{lnode.tag} \"{stmp}\"\n") else: if isinstance(lnode.text, str): - textstr = pkk_str_clean(lnode.text).strip() + textstr = pkk_str_clean(lnode.text) if textstr != "": textstr = " \""+ textstr +"\"" else: