comparison lxmldump.py @ 10:013f0cd9e5b3

More work.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 11:27:47 +0300
parents 2a8c65d22f86
children b2ac68732e45
comparison
equal deleted inserted replaced
9:2a8c65d22f86 10:013f0cd9e5b3
22 ### 22 ###
23 pkk_cfg = { 23 pkk_cfg = {
24 "verbosity": 3, 24 "verbosity": 3,
25 25
26 "annotate": False, 26 "annotate": False,
27 "dump": False, 27 "mode": 0,
28 "normalize": False, 28 "normalize": False,
29 "xml": False,
29 30
30 "debug": False, 31 "debug": False,
31 } 32 }
32 33
33 34
80 print(u"\nQuitting due to SIGINT / Ctrl+C!") 81 print(u"\nQuitting due to SIGINT / Ctrl+C!")
81 sys.exit(1) 82 sys.exit(1)
82 83
83 84
84 ## 85 ##
86 def pkk_str_clean(mstr):
87 return re.sub(r'[\n\r\t]', '', mstr)
88
89
85 def pkk_ptr_to_text(pnode): 90 def pkk_ptr_to_text(pnode):
86 # return "PTR: <{}>".format("".join(pnode.itertext()).strip()) 91 # return "PTR: {}".format(xmlET.tostring(pnode))
87 return "PTR: <{}>".format(xmlET.tostring(pnode)) 92 return "PTR: <{}>{}</>".format(
93 pnode.attrib["{http://www.w3.org/TR/xlink}href"],
94 ("".join(pnode.itertext())).strip())
88 95
89 96
90 def pkk_get_text(lnode): 97 def pkk_get_text(lnode):
91 stmp = "" 98 stmp = ""
92 for pnode in lnode.iter(): 99 for pnode in lnode.iter():
93 if pnode.tag == "Ptr": 100 if pnode.tag == "Ptr":
94 stmp += pkk_ptr_to_text(pnode) 101 stmp += pkk_ptr_to_text(pnode)
95 else: 102 else:
96 if isinstance(pnode.text, str): 103 if isinstance(pnode.text, str):
97 ptext = pnode.text 104 ptext = pkk_str_clean(pnode.text).strip()
98 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: 105 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap:
99 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] 106 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1]
100 else: 107 else:
101 stmp += ptext 108 stmp += ptext
102 109
103 if isinstance(pnode.tail, str): 110 if isinstance(pnode.tail, str):
104 stmp += " "+ pnode.tail.strip() 111 stmp += pkk_str_clean(pnode.tail)
105 112
106 return stmp.strip() 113 return stmp.strip()
107 114
108 115
109 ## 116 ##
111 if lnode.tag in ["Example"]: 118 if lnode.tag in ["Example"]:
112 stmp = pkk_get_text(lnode) 119 stmp = pkk_get_text(lnode)
113 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) 120 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp))
114 else: 121 else:
115 if isinstance(lnode.text, str): 122 if isinstance(lnode.text, str):
116 stmp = lnode.text.strip() 123 stmp = pkk_str_clean(lnode.text).strip()
117 if stmp != "": 124 if stmp != "":
118 stmp = " \""+ stmp +"\"" 125 stmp = " \""+ stmp +"\""
119 else: 126 else:
120 stmp = "" 127 stmp = ""
121 128
122 if len(lnode.attrib) > 0: 129 if len(lnode.attrib) > 0:
123 atmp = " "+str(lnode.attrib) 130 atmp = " "+ str(lnode.attrib)
124 else: 131 else:
125 atmp = "" 132 atmp = ""
126 133
127 pkk_printi(indent, "{}{}{}\n".format(lnode.tag, atmp, stmp)) 134 pkk_printi(indent, "{}{}{}\n".format(lnode.tag, atmp, stmp))
128 for qnode in lnode.findall("./*"): 135 for qnode in lnode.findall("./*"):
196 arg = arg.lstrip("-") 203 arg = arg.lstrip("-")
197 204
198 if arg == "help" or arg == "h": 205 if arg == "help" or arg == "h":
199 pkk_show_help = True 206 pkk_show_help = True
200 elif arg == "dump" or arg == "d": 207 elif arg == "dump" or arg == "d":
201 pkk_cfg["dump"] = True 208 pkk_cfg["mode"] = 1
209 elif arg == "xml" or arg == "x":
210 pkk_cfg["mode"] = 2
202 elif arg == "normalize" or arg == "n": 211 elif arg == "normalize" or arg == "n":
203 pkk_cfg["normalize"] = True 212 pkk_cfg["normalize"] = True
204 elif arg == "annotate" or arg == "a": 213 elif arg == "annotate" or arg == "a":
205 pkk_cfg["annotate"] = True 214 pkk_cfg["annotate"] = True
206 elif arg == "p": 215 elif arg == "p":
261 for dnode in xroot.findall("./DictionaryEntry"): 270 for dnode in xroot.findall("./DictionaryEntry"):
262 271
263 if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list: 272 if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list:
264 continue 273 continue
265 274
266 if pkk_cfg["dump"]: 275 if pkk_cfg["mode"] == 0:
276 pkk_output_node(0, dnode)
277 elif pkk_cfg["mode"] == 1:
267 pkk_dump_recursive(0, dnode) 278 pkk_dump_recursive(0, dnode)
279 elif pkk_cfg["mode"] == 2:
280 pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n")
268 else: 281 else:
269 pkk_output_node(0, dnode) 282 pkk_fatal("Invalid operation mode?")
270 283
271 print("\n") 284 print("\n")
272 285
273 except (BrokenPipeError, IOError) as e: 286 except (BrokenPipeError, IOError) as e:
274 sys.stderr.close() 287 sys.stderr.close()