Mercurial > hg > lxmldump
annotate lxmldump.py @ 3:7ce08dea935b
Special case handling for Example elements.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 10 May 2021 12:24:10 +0300 |
parents | 0e5f705a895b |
children | 34a89d61dbe7 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/python3 -B |
2 # coding=utf-8 | |
3 ### | |
4 ### ISO/FDIS 1951 lxmldump | |
5 ### | |
6 import sys | |
7 import signal | |
8 import re | |
9 from pathlib import Path | |
10 import xml.etree.ElementTree as xmlET | |
11 | |
12 assert sys.version_info >= (3, 7) | |
13 | |
14 | |
15 ### | |
16 ### Default settings | |
17 ### | |
18 pkk_cfg = { | |
19 "verbosity": 1, | |
20 "dump": False, | |
21 } | |
22 | |
23 | |
24 ### | |
25 ### Misc. helper functions, etc | |
26 ### | |
27 def pkk_cleanup(): | |
28 return 0 | |
29 | |
30 | |
31 ## Wrapper for print() | |
32 def pkk_print(level, smsg): | |
33 if pkk_cfg["verbosity"] >= level: | |
34 print(smsg) | |
35 | |
36 | |
37 ## Fatal error handler | |
38 def pkk_fatal(smsg): | |
39 print(u"ERROR: "+ smsg) | |
40 sys.exit(1) | |
41 | |
42 | |
43 ## Handler for SIGINT signals | |
44 def pkk_signal_handler(signal, frame): | |
45 pkk_cleanup() | |
46 print(u"\nQuitting due to SIGINT / Ctrl+C!") | |
47 sys.exit(1) | |
48 | |
49 | |
50 ### | |
51 ### Main program starts | |
52 ### | |
53 signal.signal(signal.SIGINT, pkk_signal_handler) | |
54 | |
55 | |
56 ### Check if we have arguments | |
57 pkk_show_help = False | |
58 pkk_filenames = [] | |
59 argc = 1 | |
60 while argc < len(sys.argv): | |
61 arg = sys.argv[argc] | |
62 | |
63 needs_param = False | |
64 if argc + 1 < len(sys.argv): | |
65 param = sys.argv[argc + 1] | |
66 else: | |
67 param = None | |
68 | |
69 # Check for option type arg | |
70 if arg[0:1] == "-": | |
71 oarg = arg | |
72 arg = arg.lstrip("-") | |
73 | |
74 if arg == "help" or arg == "h": | |
75 pkk_show_help = True | |
76 elif arg == "dump" or arg == "d": | |
77 pkk_cfg["dump"] = True | |
78 elif arg == "v" or arg == "verbosity": | |
79 needs_param = True | |
80 pkk_cfg["verbosity"] = param | |
81 else: | |
82 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) | |
83 | |
84 if needs_param and param == None: | |
85 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) | |
86 else: | |
87 # Non-option argument | |
88 pkk_filenames.append(arg) | |
89 | |
90 if needs_param: | |
91 argc += 2 | |
92 else: | |
93 argc += 1 | |
94 | |
95 | |
96 ### Show help if requested | |
97 if pkk_show_help or len(pkk_filenames) == 0: | |
98 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data") | |
99 print(u"Usage: {0} <options> <input xml file(s)>". | |
100 format(str(Path(sys.argv[0]).name))) | |
101 print(u"") | |
102 print(u" --help Show this help") | |
103 # print(u" -v, --verbosity <0-3> Set verbosity") | |
104 print(u" -d, --dump Dump mode") | |
105 print(u"") | |
106 sys.exit(0) | |
107 | |
108 | |
109 | |
110 | |
111 ### | |
112 ### Main | |
113 ### | |
114 def pkk_recursive_dump(lnode, indent): | |
3
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
115 if lnode.tag == "Example": |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
116 stmp = "".join(lnode.itertext()).strip() |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
117 print("{}{} \"{}\"".format(" " * indent, lnode.tag, stmp)) |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
118 else: |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
119 stmp = "" |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
120 if lnode.text != None: |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
121 tmp = str(lnode.text).strip() |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
122 if tmp != "": |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
123 stmp = " \""+ tmp +"\"" |
0 | 124 |
3
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
125 if len(lnode.attrib) > 0: |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
126 atmp = " "+str(lnode.attrib) |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
127 else: |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
128 atmp = "" |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
129 |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
130 print("{}{}{}{}".format(" " * indent, lnode.tag, atmp, stmp)) |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
131 for qnode in lnode.findall("./*"): |
7ce08dea935b
Special case handling for Example elements.
Matti Hamalainen <ccr@tnsp.org>
parents:
1
diff
changeset
|
132 pkk_recursive_dump(qnode, indent + 1) |
0 | 133 |
134 | |
135 for filename in pkk_filenames: | |
136 # Parse XML file into element tree | |
137 try: | |
138 uxml = xmlET.parse(filename) | |
139 except Exception as e: | |
140 pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e))) | |
141 | |
142 # Dump output | |
143 try: | |
144 xroot = uxml.getroot() | |
145 for dnode in xroot.findall("./DictionaryEntry"): | |
146 if pkk_cfg["dump"]: | |
147 pkk_recursive_dump(dnode, 0) | |
148 print("\n\n") | |
149 else: | |
150 wlist = [] | |
151 dlist = [] | |
152 for wnode in dnode.findall("./HeadwordCtn"): | |
153 for qnode in wnode.findall("./SearchForm"): | |
154 wlist.append(str(qnode.text).strip()) | |
155 for qnode in wnode.findall("./Definition"): | |
156 dlist.append(str(qnode.text).strip()) | |
157 | |
158 for wnode in dnode.findall("./SenseGrp"): | |
159 for qnode in wnode.findall("./Definition"): | |
160 dlist.append(str(qnode.text).strip()) | |
161 | |
162 print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) | |
163 | |
164 except (BrokenPipeError, IOError) as e: | |
165 sys.stderr.close() | |
166 sys.exit(1) | |
167 | |
168 pkk_cleanup() | |
169 sys.exit(0) |