Mercurial > hg > lxmldump
comparison lxmldump.py @ 30:34755af2ea1f
Make Ptr field URL formatting configurable.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 25 May 2021 12:40:03 +0300 |
parents | f91ef7d7615b |
children | 4cbefe4c6f53 |
comparison
equal
deleted
inserted
replaced
29:f91ef7d7615b | 30:34755af2ea1f |
---|---|
26 ### | 26 ### |
27 # Operation modes | 27 # Operation modes |
28 PKK_MODE_NORMAL = 0 | 28 PKK_MODE_NORMAL = 0 |
29 PKK_MODE_DUMP = 1 | 29 PKK_MODE_DUMP = 1 |
30 PKK_MODE_XML = 2 | 30 PKK_MODE_XML = 2 |
31 | |
32 | |
33 pkk_modes_list = { | |
34 PKK_MODE_NORMAL: "normal", | |
35 PKK_MODE_DUMP: "dump", | |
36 PKK_MODE_XML: "xml", | |
37 } | |
38 | |
39 | |
40 # Default Ptr URL format strings | |
41 pkk_ptr_url_fmt = { | |
42 PKK_MODE_NORMAL: u"<PTR:{href}>{text}</PTR>", | |
43 } | |
31 | 44 |
32 | 45 |
33 # Element annotation mappings | 46 # Element annotation mappings |
34 pkk_element_annotation_map = { | 47 pkk_element_annotation_map = { |
35 "Fragment" : { | 48 "Fragment" : { |
107 return re.sub(r'[\n\r\t]', '', mstr) | 120 return re.sub(r'[\n\r\t]', '', mstr) |
108 | 121 |
109 | 122 |
110 ## Format a "Ptr" node as text | 123 ## Format a "Ptr" node as text |
111 def pkk_ptr_to_text(pnode): | 124 def pkk_ptr_to_text(pnode): |
112 phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] | 125 # If custom format set, use it |
113 ptext = ("".join(pnode.itertext())).strip() | 126 if pkk_cfg.ptr_url_fmt != None: |
114 | 127 pfmt = pkk_cfg.ptr_url_fmt |
115 return f"<PTR:{phref}>{ptext}</PTR>" | 128 elif pkk_cfg.mode in pkk_ptr_url_fmt: |
129 # Else try mode-specific | |
130 pfmt = pkk_ptr_url_fmt[pkk_cfg.mode] | |
131 else: | |
132 # Last resort is normal mode format | |
133 pfmt = pkk_ptr_url_fmt[PKK_MODE_NORMAL] | |
134 | |
135 return pfmt.format( | |
136 text=("".join(pnode.itertext())).strip(), | |
137 href=pnode.attrib["{http://www.w3.org/TR/xlink}href"]) | |
116 | 138 |
117 | 139 |
118 ## Get text inside a given node | 140 ## Get text inside a given node |
119 def pkk_node_to_text(lnode): | 141 def pkk_node_to_text(lnode): |
120 stmp = "" | 142 stmp = "" |
243 | 265 |
244 optparser.add_argument("-x", "--xml", | 266 optparser.add_argument("-x", "--xml", |
245 dest="mode", | 267 dest="mode", |
246 action="store_const", const=PKK_MODE_XML, | 268 action="store_const", const=PKK_MODE_XML, |
247 help="output as XML") | 269 help="output as XML") |
270 | |
271 optparser.add_argument("--ptr-url-fmt", | |
272 dest="ptr_url_fmt", | |
273 type=str, | |
274 default=None, | |
275 metavar="str", | |
276 help='Ptr URL format string (see below)') | |
248 | 277 |
249 optparser.add_argument("-n", "--normalize", | 278 optparser.add_argument("-n", "--normalize", |
250 dest="normalize", | 279 dest="normalize", |
251 action="store_true", | 280 action="store_true", |
252 help="output NFC normalized Unicode") | 281 help="output NFC normalized Unicode") |