comparison lxmldump.py @ 5:274b2091137c

Some more work on cleaning this up.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 10 May 2021 21:38:11 +0300
parents 60b789dfee32
children 34a89d61dbe7
comparison
equal deleted inserted replaced
4:60b789dfee32 5:274b2091137c
10 import sys 10 import sys
11 import signal 11 import signal
12 import re 12 import re
13 from pathlib import Path 13 from pathlib import Path
14 import xml.etree.ElementTree as xmlET 14 import xml.etree.ElementTree as xmlET
15 import unicodedata
15 16
16 assert sys.version_info >= (3, 7) 17 assert sys.version_info >= (3, 7)
17 18
18 19
19 ### 20 ###
20 ### Default settings 21 ### Default settings
21 ### 22 ###
22 pkk_cfg = { 23 pkk_cfg = {
23 "verbosity": 1,
24 "dump": False, 24 "dump": False,
25 "normalize": False,
25 } 26 }
26 27
27 28
28 ### 29 ###
29 ### Misc. helper functions, etc 30 ### Misc. helper functions, etc
31 def pkk_cleanup(): 32 def pkk_cleanup():
32 return 0 33 return 0
33 34
34 35
35 ## Wrapper for print() 36 ## Wrapper for print()
36 def pkk_print(level, smsg): 37 def pkk_print(smsg):
37 if pkk_cfg["verbosity"] >= level: 38 if pkk_cfg["normalize"]:
38 print(smsg) 39 sys.stdout.write(unicodedata.normalize("NFC", smsg))
40 else:
41 sys.stdout.write(smsg)
39 42
40 43
41 ## Fatal error handler 44 ## Fatal error handler
42 def pkk_fatal(smsg): 45 def pkk_fatal(smsg):
43 print(u"ERROR: "+ smsg) 46 print(u"ERROR: "+ smsg)
77 80
78 if arg == "help" or arg == "h": 81 if arg == "help" or arg == "h":
79 pkk_show_help = True 82 pkk_show_help = True
80 elif arg == "dump" or arg == "d": 83 elif arg == "dump" or arg == "d":
81 pkk_cfg["dump"] = True 84 pkk_cfg["dump"] = True
82 elif arg == "v" or arg == "verbosity": 85 elif arg == "normalize" or arg == "n":
83 needs_param = True 86 pkk_cfg["normalize"] = True
84 pkk_cfg["verbosity"] = param
85 else: 87 else:
86 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg)) 88 pkk_fatal(u"Invalid option argument '{0}'.".format(oarg))
87 89
88 if needs_param and param == None: 90 if needs_param and param == None:
89 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg)) 91 pkk_fatal(u"Option '{0}' requires an argument.".format(oarg))
102 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data") 104 print(u"lxmldump - Dump ISO/FDIS 1951 XML file data")
103 print(u"Usage: {0} <options> <input xml file(s)>". 105 print(u"Usage: {0} <options> <input xml file(s)>".
104 format(str(Path(sys.argv[0]).name))) 106 format(str(Path(sys.argv[0]).name)))
105 print(u"") 107 print(u"")
106 print(u" --help Show this help") 108 print(u" --help Show this help")
107 # print(u" -v, --verbosity <0-3> Set verbosity")
108 print(u" -d, --dump Dump mode") 109 print(u" -d, --dump Dump mode")
109 print(u"") 110 print(u"")
110 sys.exit(0) 111 sys.exit(0)
111 112
112 113
113
114
115 ### 114 ###
116 ### Main 115 ### Main
117 ### 116 ###
118 def pkk_recursive_dump(lnode, indent): 117 def pkk_dump_simple_node(lnode, indent):
119 stmp = "" 118 stmp = ""
120 if lnode.text != None: 119 if lnode.text != None:
121 tmp = str(lnode.text).strip() 120 tmp = str(lnode.text).strip()
122 if tmp != "": 121 if tmp != "":
123 stmp = " \""+ tmp +"\"" 122 stmp = " \""+ tmp +"\""
124 123
125 print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) 124 pkk_print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp))
126 for qnode in lnode.findall("./*"): 125 for qnode in lnode.findall("./*"):
127 pkk_recursive_dump(qnode, indent + 1) 126 pkk_dump_simple_node(qnode, indent + 1)
127
128
129 def pkk_dump_node(dnode):
130 wlist = []
131 dlist = []
132 for wnode in dnode.findall("./HeadwordCtn"):
133 for qnode in wnode.findall("./SearchForm"):
134 wlist.append(str(qnode.text).strip())
135
136 for qnode in wnode.findall("./Definition"):
137 dlist.append(str(qnode.text).strip())
138
139 for wnode in dnode.findall("./SenseGrp"):
140 for qnode in wnode.findall("./Definition"):
141 dlist.append(str(qnode.text).strip())
142
143 pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
128 144
129 145
130 for filename in pkk_filenames: 146 for filename in pkk_filenames:
131 # Parse XML file into element tree 147 # Parse XML file into element tree
132 try: 148 try:
137 # Dump output 153 # Dump output
138 try: 154 try:
139 xroot = uxml.getroot() 155 xroot = uxml.getroot()
140 for dnode in xroot.findall("./DictionaryEntry"): 156 for dnode in xroot.findall("./DictionaryEntry"):
141 if pkk_cfg["dump"]: 157 if pkk_cfg["dump"]:
142 pkk_recursive_dump(dnode, 0) 158 pkk_dump_simple_node(dnode, 0)
143 print("\n\n") 159 print("\n\n")
144 else: 160 else:
145 wlist = [] 161 pkk_dump_node(dnode)
146 dlist = []
147 for wnode in dnode.findall("./HeadwordCtn"):
148 for qnode in wnode.findall("./SearchForm"):
149 wlist.append(str(qnode.text).strip())
150 for qnode in wnode.findall("./Definition"):
151 dlist.append(str(qnode.text).strip())
152
153 for wnode in dnode.findall("./SenseGrp"):
154 for qnode in wnode.findall("./Definition"):
155 dlist.append(str(qnode.text).strip())
156
157 print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
158 162
159 except (BrokenPipeError, IOError) as e: 163 except (BrokenPipeError, IOError) as e:
160 sys.stderr.close() 164 sys.stderr.close()
161 sys.exit(1) 165 sys.exit(1)
162 166