comparison lxmldump.py @ 30:34755af2ea1f

Make Ptr field URL formatting configurable.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 25 May 2021 12:40:03 +0300
parents f91ef7d7615b
children 4cbefe4c6f53
comparison
equal deleted inserted replaced
29:f91ef7d7615b 30:34755af2ea1f
26 ### 26 ###
27 # Operation modes 27 # Operation modes
28 PKK_MODE_NORMAL = 0 28 PKK_MODE_NORMAL = 0
29 PKK_MODE_DUMP = 1 29 PKK_MODE_DUMP = 1
30 PKK_MODE_XML = 2 30 PKK_MODE_XML = 2
31
32
33 pkk_modes_list = {
34 PKK_MODE_NORMAL: "normal",
35 PKK_MODE_DUMP: "dump",
36 PKK_MODE_XML: "xml",
37 }
38
39
40 # Default Ptr URL format strings
41 pkk_ptr_url_fmt = {
42 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>",
43 }
31 44
32 45
33 # Element annotation mappings 46 # Element annotation mappings
34 pkk_element_annotation_map = { 47 pkk_element_annotation_map = {
35 "Fragment" : { 48 "Fragment" : {
107 return re.sub(r'[\n\r\t]', '', mstr) 120 return re.sub(r'[\n\r\t]', '', mstr)
108 121
109 122
110 ## Format a "Ptr" node as text 123 ## Format a "Ptr" node as text
111 def pkk_ptr_to_text(pnode): 124 def pkk_ptr_to_text(pnode):
112 phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] 125 # If custom format set, use it
113 ptext = ("".join(pnode.itertext())).strip() 126 if pkk_cfg.ptr_url_fmt != None:
114 127 pfmt = pkk_cfg.ptr_url_fmt
115 return f"<PTR:{phref}>{ptext}</PTR>" 128 elif pkk_cfg.mode in pkk_ptr_url_fmt:
129 # Else try mode-specific
130 pfmt = pkk_ptr_url_fmt[pkk_cfg.mode]
131 else:
132 # Last resort is normal mode format
133 pfmt = pkk_ptr_url_fmt[PKK_MODE_NORMAL]
134
135 return pfmt.format(
136 text=("".join(pnode.itertext())).strip(),
137 href=pnode.attrib["{http://www.w3.org/TR/xlink}href"])
116 138
117 139
118 ## Get text inside a given node 140 ## Get text inside a given node
119 def pkk_node_to_text(lnode): 141 def pkk_node_to_text(lnode):
120 stmp = "" 142 stmp = ""
243 265
244 optparser.add_argument("-x", "--xml", 266 optparser.add_argument("-x", "--xml",
245 dest="mode", 267 dest="mode",
246 action="store_const", const=PKK_MODE_XML, 268 action="store_const", const=PKK_MODE_XML,
247 help="output as XML") 269 help="output as XML")
270
271 optparser.add_argument("--ptr-url-fmt",
272 dest="ptr_url_fmt",
273 type=str,
274 default=None,
275 metavar="str",
276 help='Ptr URL format string (see below)')
248 277
249 optparser.add_argument("-n", "--normalize", 278 optparser.add_argument("-n", "--normalize",
250 dest="normalize", 279 dest="normalize",
251 action="store_true", 280 action="store_true",
252 help="output NFC normalized Unicode") 281 help="output NFC normalized Unicode")