# HG changeset patch # User Matti Hamalainen # Date 1621935603 -10800 # Node ID 34755af2ea1f50f8edbf95cef94b869ea73a6637 # Parent f91ef7d7615bd226bc79f3d9d713faf1231ffc68 Make Ptr field URL formatting configurable. diff -r f91ef7d7615b -r 34755af2ea1f lxmldump.py --- a/lxmldump.py Tue May 25 12:39:04 2021 +0300 +++ b/lxmldump.py Tue May 25 12:40:03 2021 +0300 @@ -30,6 +30,19 @@ PKK_MODE_XML = 2 +pkk_modes_list = { + PKK_MODE_NORMAL: "normal", + PKK_MODE_DUMP: "dump", + PKK_MODE_XML: "xml", +} + + +# Default Ptr URL format strings +pkk_ptr_url_fmt = { + PKK_MODE_NORMAL: u"{text}", +} + + # Element annotation mappings pkk_element_annotation_map = { "Fragment" : { @@ -109,10 +122,19 @@ ## Format a "Ptr" node as text def pkk_ptr_to_text(pnode): - phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] - ptext = ("".join(pnode.itertext())).strip() + # If custom format set, use it + if pkk_cfg.ptr_url_fmt != None: + pfmt = pkk_cfg.ptr_url_fmt + elif pkk_cfg.mode in pkk_ptr_url_fmt: + # Else try mode-specific + pfmt = pkk_ptr_url_fmt[pkk_cfg.mode] + else: + # Last resort is normal mode format + pfmt = pkk_ptr_url_fmt[PKK_MODE_NORMAL] - return f"{ptext}" + return pfmt.format( + text=("".join(pnode.itertext())).strip(), + href=pnode.attrib["{http://www.w3.org/TR/xlink}href"]) ## Get text inside a given node @@ -246,6 +268,13 @@ action="store_const", const=PKK_MODE_XML, help="output as XML") +optparser.add_argument("--ptr-url-fmt", + dest="ptr_url_fmt", + type=str, + default=None, + metavar="str", + help='Ptr URL format string (see below)') + optparser.add_argument("-n", "--normalize", dest="normalize", action="store_true",