Mercurial > hg > lxmldump
comparison lxmldump.py @ 5:274b2091137c
Some more work on cleaning this up.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 10 May 2021 21:38:11 +0300 |
parents | 60b789dfee32 |
children | 34a89d61dbe7 |
comparison
equal
deleted
inserted
replaced
4:60b789dfee32 | 5:274b2091137c |
---|---|
10 import sys | 10 import sys |
11 import signal | 11 import signal |
12 import re | 12 import re |
13 from pathlib import Path | 13 from pathlib import Path |
14 import xml.etree.ElementTree as xmlET | 14 import xml.etree.ElementTree as xmlET |
15 import unicodedata | |
15 | 16 |
16 assert sys.version_info >= (3, 7) | 17 assert sys.version_info >= (3, 7) |
17 | 18 |
18 | 19 |
19 ### | 20 ### |
20 ### Default settings | 21 ### Default settings |
21 ### | 22 ### |
22 pkk_cfg = { | 23 pkk_cfg = { |
23 "verbosity": 1, | |
24 "dump": False, | 24 "dump": False, |
25 "normalize": False, | |
25 } | 26 } |
26 | 27 |
27 | 28 |
28 ### | 29 ### |
29 ### Misc. helper functions, etc | 30 ### Misc. helper functions, etc |
31 def pkk_cleanup(): | 32 def pkk_cleanup(): |
32 return 0 | 33 return 0 |
33 | 34 |
34 | 35 |
35 ## Wrapper for print() | 36 ## Wrapper for print() |
36 def pkk_print(level, smsg): | 37 def pkk_print(smsg): |
37 if pkk_cfg["verbosity"] >= level: | 38 if pkk_cfg["normalize"]: |
38 print(smsg) | 39 sys.stdout.write(unicodedata.normalize("NFC", smsg)) |
40 else: | |
41 sys.stdout.write(smsg) | |
39 | 42 |
40 | 43 |
41 ## Fatal error handler | 44 ## Fatal error handler |
42 def pkk_fatal(smsg): | 45 def pkk_fatal(smsg): |
43 print(u"ERROR: "+ smsg) | 46 print(u"ERROR: "+ smsg) |
77 | 80 |
78 if arg == "help" or arg == "h": | 81 if arg == "help" or arg == "h": |
79 pkk_show_help = True | 82 pkk_show_help = True |
80 elif arg == "dump" or arg == "d": | 83 elif arg == "dump" or arg == "d": |
81 pkk_cfg["dump"] = True | 84 pkk_cfg["dump"] = True |
82 elif arg == "v" or arg == "verbosity": | 85 elif arg == "normalize" or arg == "n": |
83 needs_param = True | 86 pkk_cfg["normalize"] = True |
84 pkk_cfg["verbosity"] = param | |
85 else: | 87 else: |
86 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) | 88 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) |
87 | 89 |
88 if needs_param and param == None: | 90 if needs_param and param == None: |
89 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) | 91 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) |
102 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data") | 104 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data") |
103 print(u"Usage: {0} <options> <input xml file(s)>". | 105 print(u"Usage: {0} <options> <input xml file(s)>". |
104 format(str(Path(sys.argv[0]).name))) | 106 format(str(Path(sys.argv[0]).name))) |
105 print(u"") | 107 print(u"") |
106 print(u" --help Show this help") | 108 print(u" --help Show this help") |
107 # print(u" -v, --verbosity <0-3> Set verbosity") | |
108 print(u" -d, --dump Dump mode") | 109 print(u" -d, --dump Dump mode") |
109 print(u"") | 110 print(u"") |
110 sys.exit(0) | 111 sys.exit(0) |
111 | 112 |
112 | 113 |
113 | |
114 | |
115 ### | 114 ### |
116 ### Main | 115 ### Main |
117 ### | 116 ### |
118 def pkk_recursive_dump(lnode, indent): | 117 def pkk_dump_simple_node(lnode, indent): |
119 stmp = "" | 118 stmp = "" |
120 if lnode.text != None: | 119 if lnode.text != None: |
121 tmp = str(lnode.text).strip() | 120 tmp = str(lnode.text).strip() |
122 if tmp != "": | 121 if tmp != "": |
123 stmp = " \""+ tmp +"\"" | 122 stmp = " \""+ tmp +"\"" |
124 | 123 |
125 print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) | 124 pkk_print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) |
126 for qnode in lnode.findall("./*"): | 125 for qnode in lnode.findall("./*"): |
127 pkk_recursive_dump(qnode, indent + 1) | 126 pkk_dump_simple_node(qnode, indent + 1) |
127 | |
128 | |
129 def pkk_dump_node(dnode): | |
130 wlist = [] | |
131 dlist = [] | |
132 for wnode in dnode.findall("./HeadwordCtn"): | |
133 for qnode in wnode.findall("./SearchForm"): | |
134 wlist.append(str(qnode.text).strip()) | |
135 | |
136 for qnode in wnode.findall("./Definition"): | |
137 dlist.append(str(qnode.text).strip()) | |
138 | |
139 for wnode in dnode.findall("./SenseGrp"): | |
140 for qnode in wnode.findall("./Definition"): | |
141 dlist.append(str(qnode.text).strip()) | |
142 | |
143 pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) | |
128 | 144 |
129 | 145 |
130 for filename in pkk_filenames: | 146 for filename in pkk_filenames: |
131 # Parse XML file into element tree | 147 # Parse XML file into element tree |
132 try: | 148 try: |
137 # Dump output | 153 # Dump output |
138 try: | 154 try: |
139 xroot = uxml.getroot() | 155 xroot = uxml.getroot() |
140 for dnode in xroot.findall("./DictionaryEntry"): | 156 for dnode in xroot.findall("./DictionaryEntry"): |
141 if pkk_cfg["dump"]: | 157 if pkk_cfg["dump"]: |
142 pkk_recursive_dump(dnode, 0) | 158 pkk_dump_simple_node(dnode, 0) |
143 print("\n\n") | 159 print("\n\n") |
144 else: | 160 else: |
145 wlist = [] | 161 pkk_dump_node(dnode) |
146 dlist = [] | |
147 for wnode in dnode.findall("./HeadwordCtn"): | |
148 for qnode in wnode.findall("./SearchForm"): | |
149 wlist.append(str(qnode.text).strip()) | |
150 for qnode in wnode.findall("./Definition"): | |
151 dlist.append(str(qnode.text).strip()) | |
152 | |
153 for wnode in dnode.findall("./SenseGrp"): | |
154 for qnode in wnode.findall("./Definition"): | |
155 dlist.append(str(qnode.text).strip()) | |
156 | |
157 print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) | |
158 | 162 |
159 except (BrokenPipeError, IOError) as e: | 163 except (BrokenPipeError, IOError) as e: |
160 sys.stderr.close() | 164 sys.stderr.close() |
161 sys.exit(1) | 165 sys.exit(1) |
162 | 166 |