Mercurial > hg > lxmldump
comparison lxmldump.py @ 20:f274504eafd0
Use Python argparse module instead of custom self-rolled argument parser.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 11 May 2021 17:13:38 +0300 |
parents | 7c6eb57798bd |
children | 7ef08e05a5bf |
comparison
equal
deleted
inserted
replaced
19:7c6eb57798bd | 20:f274504eafd0 |
---|---|
11 import signal | 11 import signal |
12 import re | 12 import re |
13 from pathlib import Path | 13 from pathlib import Path |
14 import xml.etree.ElementTree as xmlET | 14 import xml.etree.ElementTree as xmlET |
15 import unicodedata | 15 import unicodedata |
16 import argparse | |
16 | 17 |
17 assert sys.version_info >= (3, 7) | 18 assert sys.version_info >= (3, 7) |
18 | 19 |
19 | 20 |
20 ### | 21 ### |
21 ### Default settings | 22 ### Default settings |
22 ### | 23 ### |
23 pkk_cfg = { | |
24 "verbosity": 3, | |
25 "annotate": False, | |
26 "mode": 0, | |
27 "normalize": False, | |
28 "debug": False, | |
29 } | |
30 | |
31 | |
32 pkk_str_fmap = { | 24 pkk_str_fmap = { |
33 "Fragment" : ["<", ">"], | 25 "Fragment" : ["<", ">"], |
34 } | 26 } |
35 | 27 |
36 | 28 |
54 | 46 |
55 | 47 |
56 ## Print string to stdout using normalized Unicode if enabled | 48 ## Print string to stdout using normalized Unicode if enabled |
57 def pkk_print(smsg): | 49 def pkk_print(smsg): |
58 try: | 50 try: |
59 if pkk_cfg["normalize"]: | 51 if pkk_cfg.normalize: |
60 sys.stdout.write(unicodedata.normalize("NFC", smsg)) | 52 sys.stdout.write(unicodedata.normalize("NFC", smsg)) |
61 else: | 53 else: |
62 sys.stdout.write(smsg) | 54 sys.stdout.write(smsg) |
63 | 55 |
64 except (BrokenPipeError, IOError) as e: | 56 except (BrokenPipeError, IOError) as e: |
70 pkk_print((" " * indent) + smsg) | 62 pkk_print((" " * indent) + smsg) |
71 | 63 |
72 | 64 |
73 ## Check value against current verbosity level | 65 ## Check value against current verbosity level |
74 def pkk_verbosity(lvl): | 66 def pkk_verbosity(lvl): |
75 return pkk_cfg["verbosity"] >= lvl | 67 return pkk_cfg.verbosity >= lvl |
76 | 68 |
77 | 69 |
78 ## Fatal error handler | 70 ## Fatal error handler |
79 def pkk_fatal(smsg): | 71 def pkk_fatal(smsg): |
80 print(u"ERROR: "+ smsg) | 72 print(u"ERROR: "+ smsg) |
107 if pnode.tag == "Ptr": | 99 if pnode.tag == "Ptr": |
108 stmp += pkk_ptr_to_text(pnode) | 100 stmp += pkk_ptr_to_text(pnode) |
109 else: | 101 else: |
110 if isinstance(pnode.text, str): | 102 if isinstance(pnode.text, str): |
111 ptext = pkk_str_clean(pnode.text).strip() | 103 ptext = pkk_str_clean(pnode.text).strip() |
112 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: | 104 if pkk_cfg.annotate and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: |
113 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] | 105 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] |
114 else: | 106 else: |
115 stmp += ptext | 107 stmp += ptext |
116 | 108 |
117 if isinstance(pnode.tail, str): | 109 if isinstance(pnode.tail, str): |
212 ### | 204 ### |
213 ### Main program starts | 205 ### Main program starts |
214 ### | 206 ### |
215 signal.signal(signal.SIGINT, pkk_signal_handler) | 207 signal.signal(signal.SIGINT, pkk_signal_handler) |
216 | 208 |
217 | 209 optparser = argparse.ArgumentParser( |
218 ### Check if we have arguments | 210 description="lxmldump - Dump ISO/FDIS 1951 XML file data", |
219 pkk_show_help = False | 211 usage="%(prog)s [options] <input xml file(s)>", |
220 pkk_filenames = [] | 212 epilog="\n\n" |
221 argc = 1 | 213 ) |
222 while argc < len(sys.argv): | 214 |
223 arg = sys.argv[argc] | 215 optparser.add_argument("filenames", action="extend", nargs="*", |
224 | 216 type=str, metavar="filename", help="XML filename(s)") |
225 needs_param = False | 217 |
226 if argc + 1 < len(sys.argv): | 218 optparser.add_argument("-d", "--dump", |
227 param = sys.argv[argc + 1] | 219 action="store_const", const=1, default=0, |
228 else: | 220 dest="mode", help="output as simple dump") |
229 param = None | 221 |
230 | 222 optparser.add_argument("-x", "--xml", |
231 # Check for option type arg | 223 action="store_const", const=2, |
232 if arg[0:1] == "-": | 224 dest="mode", help="output as XML") |
233 oarg = arg | 225 |
234 arg = arg.lstrip("-") | 226 optparser.add_argument("-n", "--normalize", |
235 | 227 action="store_const", const=True, default=False, |
236 if arg == "help" or arg == "h": | 228 dest="normalize", help="output NFC normalized Unicode") |
237 pkk_show_help = True | 229 |
238 elif arg == "dump" or arg == "d": | 230 optparser.add_argument("-a", "--annotate", |
239 pkk_cfg["mode"] = 1 | 231 action="store_const", const=True, default=False, |
240 elif arg == "xml" or arg == "x": | 232 dest="annotate", help="annotate strings") |
241 pkk_cfg["mode"] = 2 | 233 |
242 elif arg == "normalize" or arg == "n": | 234 optparser.add_argument("-v", "--verbosity", |
243 pkk_cfg["normalize"] = True | 235 type=int, choices=range(0,4), default=3, |
244 elif arg == "annotate" or arg == "a": | 236 metavar="n", |
245 pkk_cfg["annotate"] = True | 237 dest="verbosity", help='set verbosity level (0-3, default: %(default)s)') |
246 elif arg == "p": | 238 |
247 pkk_cfg["debug"] = True | 239 optparser.add_argument("-p", "--debug", |
248 elif arg == "verbosity" or arg == "v": | 240 action="store_const", const=True, default=False, |
249 needs_param = True | 241 dest="debug", help=argparse.SUPPRESS) |
250 pkk_cfg["verbosity"] = param | 242 |
251 else: | 243 |
252 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) | 244 ### Show help if needed |
253 | 245 pkk_cfg = optparser.parse_args() |
254 if needs_param and param == None: | 246 if len(pkk_cfg.filenames) == 0: |
255 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) | 247 optparser.print_help() |
256 else: | |
257 # Non-option argument | |
258 pkk_filenames.append(arg) | |
259 | |
260 if needs_param: | |
261 argc += 2 | |
262 else: | |
263 argc += 1 | |
264 | |
265 | |
266 ### Show help if requested | |
267 if pkk_show_help or len(pkk_filenames) == 0: | |
268 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data") | |
269 print(u"Usage: {0} <options> <input xml file(s)>". | |
270 format(str(Path(sys.argv[0]).name))) | |
271 print(u"") | |
272 print(u" --help Show this help") | |
273 print(u" -d, --dump Output as simple dump") | |
274 print(u" -x, --xml Output as XML") | |
275 print(u" -n, --normalize Output NFC normalized Unicode") | |
276 print(u" -a, --annotate Annotate strings") | |
277 print(u" -v, --verbosity <n> Set verbosity level (0 - 3)") | |
278 print(u"") | |
279 sys.exit(0) | 248 sys.exit(0) |
280 | 249 |
281 | 250 |
282 ### Validate settings | |
283 try: | |
284 pkk_cfg["verbosity"] = int(pkk_cfg["verbosity"]) | |
285 except Exception as e: | |
286 pkk_fatal(u"Verbosity level is not a valid integer.") | |
287 if pkk_cfg["verbosity"] < 0 or pkk_cfg["verbosity"] > 3: | |
288 pkk_fatal(u"Invalid verbosity level value {0}.".format(pkk_cfg["verbosity"])) | |
289 | |
290 | |
291 ### Handle each input file | 251 ### Handle each input file |
292 for filename in pkk_filenames: | 252 for filename in pkk_cfg.filenames: |
293 # Parse XML file into element tree | 253 # Parse XML file into element tree |
294 try: | 254 try: |
295 uxml = xmlET.parse(filename) | 255 uxml = xmlET.parse(filename) |
296 except Exception as e: | 256 except Exception as e: |
297 pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e))) | 257 pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e))) |
299 # Dump output | 259 # Dump output |
300 try: | 260 try: |
301 xroot = uxml.getroot() | 261 xroot = uxml.getroot() |
302 for dnode in xroot.findall("./DictionaryEntry"): | 262 for dnode in xroot.findall("./DictionaryEntry"): |
303 | 263 |
304 if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list: | 264 if pkk_cfg.debug and dnode.attrib["identifier"] not in pkk_debug_list: |
305 continue | 265 continue |
306 | 266 |
307 if pkk_cfg["mode"] == 0: | 267 if pkk_cfg.mode == 0: |
308 try: | 268 try: |
309 pkk_output_node(0, dnode) | 269 pkk_output_node(0, dnode) |
310 except Exception as e: | 270 except Exception as e: |
311 pkk_dump_recursive(0, dnode) | 271 pkk_dump_recursive(0, dnode) |
312 print(str(e)) | 272 print(str(e)) |
313 sys.exit(0) | 273 sys.exit(0) |
314 elif pkk_cfg["mode"] == 1: | 274 elif pkk_cfg.mode == 1: |
315 pkk_dump_recursive(0, dnode) | 275 pkk_dump_recursive(0, dnode) |
316 elif pkk_cfg["mode"] == 2: | 276 elif pkk_cfg.mode == 2: |
317 pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n") | 277 pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n") |
318 else: | 278 else: |
319 pkk_fatal("Invalid operation mode?") | 279 pkk_fatal("Invalid operation mode?") |
320 | 280 |
321 print("\n") | 281 print("\n") |