Mercurial > hg > lxmldump
changeset 30:34755af2ea1f
Make Ptr field URL formatting configurable.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 25 May 2021 12:40:03 +0300 |
parents | f91ef7d7615b |
children | 4cbefe4c6f53 |
files | lxmldump.py |
diffstat | 1 files changed, 32 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/lxmldump.py Tue May 25 12:39:04 2021 +0300 +++ b/lxmldump.py Tue May 25 12:40:03 2021 +0300 @@ -30,6 +30,19 @@ PKK_MODE_XML = 2 +pkk_modes_list = { + PKK_MODE_NORMAL: "normal", + PKK_MODE_DUMP: "dump", + PKK_MODE_XML: "xml", +} + + +# Default Ptr URL format strings +pkk_ptr_url_fmt = { + PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", +} + + # Element annotation mappings pkk_element_annotation_map = { "Fragment" : { @@ -109,10 +122,19 @@ ## Format a "Ptr" node as text def pkk_ptr_to_text(pnode): - phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] - ptext = ("".join(pnode.itertext())).strip() + # If custom format set, use it + if pkk_cfg.ptr_url_fmt != None: + pfmt = pkk_cfg.ptr_url_fmt + elif pkk_cfg.mode in pkk_ptr_url_fmt: + # Else try mode-specific + pfmt = pkk_ptr_url_fmt[pkk_cfg.mode] + else: + # Last resort is normal mode format + pfmt = pkk_ptr_url_fmt[PKK_MODE_NORMAL] - return f"<PTR:{phref}>{ptext}</PTR>" + return pfmt.format( + text=("".join(pnode.itertext())).strip(), + href=pnode.attrib["{http://www.w3.org/TR/xlink}href"]) ## Get text inside a given node @@ -246,6 +268,13 @@ action="store_const", const=PKK_MODE_XML, help="output as XML") +optparser.add_argument("--ptr-url-fmt", + dest="ptr_url_fmt", + type=str, + default=None, + metavar="str", + help='Ptr URL format string (see below)') + optparser.add_argument("-n", "--normalize", dest="normalize", action="store_true",