comparison lxmldump.py @ 20:f274504eafd0

Use Python argparse module instead of custom self-rolled argument parser.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 17:13:38 +0300
parents 7c6eb57798bd
children 7ef08e05a5bf
comparison
equal deleted inserted replaced
19:7c6eb57798bd 20:f274504eafd0
11 import signal 11 import signal
12 import re 12 import re
13 from pathlib import Path 13 from pathlib import Path
14 import xml.etree.ElementTree as xmlET 14 import xml.etree.ElementTree as xmlET
15 import unicodedata 15 import unicodedata
16 import argparse
16 17
17 assert sys.version_info >= (3, 7) 18 assert sys.version_info >= (3, 7)
18 19
19 20
20 ### 21 ###
21 ### Default settings 22 ### Default settings
22 ### 23 ###
23 pkk_cfg = {
24 "verbosity": 3,
25 "annotate": False,
26 "mode": 0,
27 "normalize": False,
28 "debug": False,
29 }
30
31
32 pkk_str_fmap = { 24 pkk_str_fmap = {
33 "Fragment" : ["<", ">"], 25 "Fragment" : ["<", ">"],
34 } 26 }
35 27
36 28
54 46
55 47
56 ## Print string to stdout using normalized Unicode if enabled 48 ## Print string to stdout using normalized Unicode if enabled
57 def pkk_print(smsg): 49 def pkk_print(smsg):
58 try: 50 try:
59 if pkk_cfg["normalize"]: 51 if pkk_cfg.normalize:
60 sys.stdout.write(unicodedata.normalize("NFC", smsg)) 52 sys.stdout.write(unicodedata.normalize("NFC", smsg))
61 else: 53 else:
62 sys.stdout.write(smsg) 54 sys.stdout.write(smsg)
63 55
64 except (BrokenPipeError, IOError) as e: 56 except (BrokenPipeError, IOError) as e:
70 pkk_print((" " * indent) + smsg) 62 pkk_print((" " * indent) + smsg)
71 63
72 64
73 ## Check value against current verbosity level 65 ## Check value against current verbosity level
74 def pkk_verbosity(lvl): 66 def pkk_verbosity(lvl):
75 return pkk_cfg["verbosity"] >= lvl 67 return pkk_cfg.verbosity >= lvl
76 68
77 69
78 ## Fatal error handler 70 ## Fatal error handler
79 def pkk_fatal(smsg): 71 def pkk_fatal(smsg):
80 print(u"ERROR: "+ smsg) 72 print(u"ERROR: "+ smsg)
107 if pnode.tag == "Ptr": 99 if pnode.tag == "Ptr":
108 stmp += pkk_ptr_to_text(pnode) 100 stmp += pkk_ptr_to_text(pnode)
109 else: 101 else:
110 if isinstance(pnode.text, str): 102 if isinstance(pnode.text, str):
111 ptext = pkk_str_clean(pnode.text).strip() 103 ptext = pkk_str_clean(pnode.text).strip()
112 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: 104 if pkk_cfg.annotate and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap:
113 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] 105 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1]
114 else: 106 else:
115 stmp += ptext 107 stmp += ptext
116 108
117 if isinstance(pnode.tail, str): 109 if isinstance(pnode.tail, str):
212 ### 204 ###
213 ### Main program starts 205 ### Main program starts
214 ### 206 ###
215 signal.signal(signal.SIGINT, pkk_signal_handler) 207 signal.signal(signal.SIGINT, pkk_signal_handler)
216 208
217 209 optparser = argparse.ArgumentParser(
218 ### Check if we have arguments 210 description="lxmldump - Dump ISO/FDIS 1951 XML file data",
219 pkk_show_help = False 211 usage="%(prog)s [options] <input xml file(s)>",
220 pkk_filenames = [] 212 epilog="\n\n"
221 argc = 1 213 )
222 while argc < len(sys.argv): 214
223 arg = sys.argv[argc] 215 optparser.add_argument("filenames", action="extend", nargs="*",
224 216 type=str, metavar="filename", help="XML filename(s)")
225 needs_param = False 217
226 if argc + 1 < len(sys.argv): 218 optparser.add_argument("-d", "--dump",
227 param = sys.argv[argc + 1] 219 action="store_const", const=1, default=0,
228 else: 220 dest="mode", help="output as simple dump")
229 param = None 221
230 222 optparser.add_argument("-x", "--xml",
231 # Check for option type arg 223 action="store_const", const=2,
232 if arg[0:1] == "-": 224 dest="mode", help="output as XML")
233 oarg = arg 225
234 arg = arg.lstrip("-") 226 optparser.add_argument("-n", "--normalize",
235 227 action="store_const", const=True, default=False,
236 if arg == "help" or arg == "h": 228 dest="normalize", help="output NFC normalized Unicode")
237 pkk_show_help = True 229
238 elif arg == "dump" or arg == "d": 230 optparser.add_argument("-a", "--annotate",
239 pkk_cfg["mode"] = 1 231 action="store_const", const=True, default=False,
240 elif arg == "xml" or arg == "x": 232 dest="annotate", help="annotate strings")
241 pkk_cfg["mode"] = 2 233
242 elif arg == "normalize" or arg == "n": 234 optparser.add_argument("-v", "--verbosity",
243 pkk_cfg["normalize"] = True 235 type=int, choices=range(0,4), default=3,
244 elif arg == "annotate" or arg == "a": 236 metavar="n",
245 pkk_cfg["annotate"] = True 237 dest="verbosity", help='set verbosity level (0-3, default: %(default)s)')
246 elif arg == "p": 238
247 pkk_cfg["debug"] = True 239 optparser.add_argument("-p", "--debug",
248 elif arg == "verbosity" or arg == "v": 240 action="store_const", const=True, default=False,
249 needs_param = True 241 dest="debug", help=argparse.SUPPRESS)
250 pkk_cfg["verbosity"] = param 242
251 else: 243
252 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) 244 ### Show help if needed
253 245 pkk_cfg = optparser.parse_args()
254 if needs_param and param == None: 246 if len(pkk_cfg.filenames) == 0:
255 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) 247 optparser.print_help()
256 else:
257 # Non-option argument
258 pkk_filenames.append(arg)
259
260 if needs_param:
261 argc += 2
262 else:
263 argc += 1
264
265
266 ### Show help if requested
267 if pkk_show_help or len(pkk_filenames) == 0:
268 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data")
269 print(u"Usage: {0} <options> <input xml file(s)>".
270 format(str(Path(sys.argv[0]).name)))
271 print(u"")
272 print(u" --help Show this help")
273 print(u" -d, --dump Output as simple dump")
274 print(u" -x, --xml Output as XML")
275 print(u" -n, --normalize Output NFC normalized Unicode")
276 print(u" -a, --annotate Annotate strings")
277 print(u" -v, --verbosity <n> Set verbosity level (0 - 3)")
278 print(u"")
279 sys.exit(0) 248 sys.exit(0)
280 249
281 250
282 ### Validate settings
283 try:
284 pkk_cfg["verbosity"] = int(pkk_cfg["verbosity"])
285 except Exception as e:
286 pkk_fatal(u"Verbosity level is not a valid integer.")
287 if pkk_cfg["verbosity"] < 0 or pkk_cfg["verbosity"] > 3:
288 pkk_fatal(u"Invalid verbosity level value {0}.".format(pkk_cfg["verbosity"]))
289
290
291 ### Handle each input file 251 ### Handle each input file
292 for filename in pkk_filenames: 252 for filename in pkk_cfg.filenames:
293 # Parse XML file into element tree 253 # Parse XML file into element tree
294 try: 254 try:
295 uxml = xmlET.parse(filename) 255 uxml = xmlET.parse(filename)
296 except Exception as e: 256 except Exception as e:
297 pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e))) 257 pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e)))
299 # Dump output 259 # Dump output
300 try: 260 try:
301 xroot = uxml.getroot() 261 xroot = uxml.getroot()
302 for dnode in xroot.findall("./DictionaryEntry"): 262 for dnode in xroot.findall("./DictionaryEntry"):
303 263
304 if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list: 264 if pkk_cfg.debug and dnode.attrib["identifier"] not in pkk_debug_list:
305 continue 265 continue
306 266
307 if pkk_cfg["mode"] == 0: 267 if pkk_cfg.mode == 0:
308 try: 268 try:
309 pkk_output_node(0, dnode) 269 pkk_output_node(0, dnode)
310 except Exception as e: 270 except Exception as e:
311 pkk_dump_recursive(0, dnode) 271 pkk_dump_recursive(0, dnode)
312 print(str(e)) 272 print(str(e))
313 sys.exit(0) 273 sys.exit(0)
314 elif pkk_cfg["mode"] == 1: 274 elif pkk_cfg.mode == 1:
315 pkk_dump_recursive(0, dnode) 275 pkk_dump_recursive(0, dnode)
316 elif pkk_cfg["mode"] == 2: 276 elif pkk_cfg.mode == 2:
317 pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n") 277 pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n")
318 else: 278 else:
319 pkk_fatal("Invalid operation mode?") 279 pkk_fatal("Invalid operation mode?")
320 280
321 print("\n") 281 print("\n")