0
|
1 #!/usr/bin/python3 -B
|
|
2 # coding=utf-8
|
|
3 ###
|
4
|
4 ### lxmldump - Dump ISO/FDIS 1951 XML file data
|
|
5 ### Programmed and designed by Matti 'ccr' Hämäläinen <ccr@tnsp.org>
|
|
6 ### (C) Copyright 2021 Tecnic Software productions (TNSP)
|
|
7 ###
|
|
8 ### Python 3.7+ required!
|
0
|
9 ###
|
|
10 import sys
|
|
11 import signal
|
|
12 import re
|
|
13 from pathlib import Path
|
|
14 import xml.etree.ElementTree as xmlET
|
|
15
|
|
16 assert sys.version_info >= (3, 7)
|
|
17
|
|
18
|
|
19 ###
|
|
20 ### Default settings
|
|
21 ###
|
|
22 pkk_cfg = {
|
|
23 "verbosity": 1,
|
|
24 "dump": False,
|
|
25 }
|
|
26
|
|
27
|
|
28 ###
|
|
29 ### Misc. helper functions, etc
|
|
30 ###
|
|
31 def pkk_cleanup():
|
|
32 return 0
|
|
33
|
|
34
|
|
35 ## Wrapper for print()
|
|
36 def pkk_print(level, smsg):
|
|
37 if pkk_cfg["verbosity"] >= level:
|
|
38 print(smsg)
|
|
39
|
|
40
|
|
41 ## Fatal error handler
|
|
42 def pkk_fatal(smsg):
|
|
43 print(u"ERROR: "+ smsg)
|
|
44 sys.exit(1)
|
|
45
|
|
46
|
|
47 ## Handler for SIGINT signals
|
|
48 def pkk_signal_handler(signal, frame):
|
|
49 pkk_cleanup()
|
|
50 print(u"\nQuitting due to SIGINT / Ctrl+C!")
|
|
51 sys.exit(1)
|
|
52
|
|
53
|
|
54 ###
|
|
55 ### Main program starts
|
|
56 ###
|
|
57 signal.signal(signal.SIGINT, pkk_signal_handler)
|
|
58
|
|
59
|
|
60 ### Check if we have arguments
|
|
61 pkk_show_help = False
|
|
62 pkk_filenames = []
|
|
63 argc = 1
|
|
64 while argc < len(sys.argv):
|
|
65 arg = sys.argv[argc]
|
|
66
|
|
67 needs_param = False
|
|
68 if argc + 1 < len(sys.argv):
|
|
69 param = sys.argv[argc + 1]
|
|
70 else:
|
|
71 param = None
|
|
72
|
|
73 # Check for option type arg
|
|
74 if arg[0:1] == "-":
|
|
75 oarg = arg
|
|
76 arg = arg.lstrip("-")
|
|
77
|
|
78 if arg == "help" or arg == "h":
|
|
79 pkk_show_help = True
|
|
80 elif arg == "dump" or arg == "d":
|
|
81 pkk_cfg["dump"] = True
|
|
82 elif arg == "v" or arg == "verbosity":
|
|
83 needs_param = True
|
|
84 pkk_cfg["verbosity"] = param
|
|
85 else:
|
|
86 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg))
|
|
87
|
|
88 if needs_param and param == None:
|
|
89 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg))
|
|
90 else:
|
|
91 # Non-option argument
|
|
92 pkk_filenames.append(arg)
|
|
93
|
|
94 if needs_param:
|
|
95 argc += 2
|
|
96 else:
|
|
97 argc += 1
|
|
98
|
|
99
|
|
100 ### Show help if requested
|
|
101 if pkk_show_help or len(pkk_filenames) == 0:
|
|
102 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data")
|
|
103 print(u"Usage: {0} <options> <input xml file(s)>".
|
|
104 format(str(Path(sys.argv[0]).name)))
|
|
105 print(u"")
|
|
106 print(u" --help Show this help")
|
|
107 # print(u" -v, --verbosity <0-3> Set verbosity")
|
|
108 print(u" -d, --dump Dump mode")
|
|
109 print(u"")
|
|
110 sys.exit(0)
|
|
111
|
|
112
|
|
113
|
|
114
|
|
115 ###
|
|
116 ### Main
|
|
117 ###
|
|
118 def pkk_recursive_dump(lnode, indent):
|
1
|
119 stmp = ""
|
0
|
120 if lnode.text != None:
|
|
121 tmp = str(lnode.text).strip()
|
|
122 if tmp != "":
|
1
|
123 stmp = " \""+ tmp +"\""
|
0
|
124
|
1
|
125 print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp))
|
0
|
126 for qnode in lnode.findall("./*"):
|
|
127 pkk_recursive_dump(qnode, indent + 1)
|
|
128
|
|
129
|
|
130 for filename in pkk_filenames:
|
|
131 # Parse XML file into element tree
|
|
132 try:
|
|
133 uxml = xmlET.parse(filename)
|
|
134 except Exception as e:
|
|
135 pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e)))
|
|
136
|
|
137 # Dump output
|
|
138 try:
|
|
139 xroot = uxml.getroot()
|
|
140 for dnode in xroot.findall("./DictionaryEntry"):
|
|
141 if pkk_cfg["dump"]:
|
|
142 pkk_recursive_dump(dnode, 0)
|
|
143 print("\n\n")
|
|
144 else:
|
|
145 wlist = []
|
|
146 dlist = []
|
|
147 for wnode in dnode.findall("./HeadwordCtn"):
|
|
148 for qnode in wnode.findall("./SearchForm"):
|
|
149 wlist.append(str(qnode.text).strip())
|
|
150 for qnode in wnode.findall("./Definition"):
|
|
151 dlist.append(str(qnode.text).strip())
|
|
152
|
|
153 for wnode in dnode.findall("./SenseGrp"):
|
|
154 for qnode in wnode.findall("./Definition"):
|
|
155 dlist.append(str(qnode.text).strip())
|
|
156
|
|
157 print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
|
|
158
|
|
159 except (BrokenPipeError, IOError) as e:
|
|
160 sys.stderr.close()
|
|
161 sys.exit(1)
|
|
162
|
|
163 pkk_cleanup()
|
|
164 sys.exit(0)
|