Mercurial > hg > lxmldump
comparison lxmldump.py @ 10:013f0cd9e5b3
More work.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 11 May 2021 11:27:47 +0300 |
parents | 2a8c65d22f86 |
children | b2ac68732e45 |
comparison
equal
deleted
inserted
replaced
9:2a8c65d22f86 | 10:013f0cd9e5b3 |
---|---|
22 ### | 22 ### |
23 pkk_cfg = { | 23 pkk_cfg = { |
24 "verbosity": 3, | 24 "verbosity": 3, |
25 | 25 |
26 "annotate": False, | 26 "annotate": False, |
27 "dump": False, | 27 "mode": 0, |
28 "normalize": False, | 28 "normalize": False, |
29 "xml": False, | |
29 | 30 |
30 "debug": False, | 31 "debug": False, |
31 } | 32 } |
32 | 33 |
33 | 34 |
80 print(u"\nQuitting due to SIGINT / Ctrl+C!") | 81 print(u"\nQuitting due to SIGINT / Ctrl+C!") |
81 sys.exit(1) | 82 sys.exit(1) |
82 | 83 |
83 | 84 |
84 ## | 85 ## |
86 def pkk_str_clean(mstr): | |
87 return re.sub(r'[\n\r\t]', '', mstr) | |
88 | |
89 | |
85 def pkk_ptr_to_text(pnode): | 90 def pkk_ptr_to_text(pnode): |
86 # return "PTR: <{}>".format("".join(pnode.itertext()).strip()) | 91 # return "PTR: {}".format(xmlET.tostring(pnode)) |
87 return "PTR: <{}>".format(xmlET.tostring(pnode)) | 92 return "PTR: <{}>{}</>".format( |
93 pnode.attrib["{http://www.w3.org/TR/xlink}href"], | |
94 ("".join(pnode.itertext())).strip()) | |
88 | 95 |
89 | 96 |
90 def pkk_get_text(lnode): | 97 def pkk_get_text(lnode): |
91 stmp = "" | 98 stmp = "" |
92 for pnode in lnode.iter(): | 99 for pnode in lnode.iter(): |
93 if pnode.tag == "Ptr": | 100 if pnode.tag == "Ptr": |
94 stmp += pkk_ptr_to_text(pnode) | 101 stmp += pkk_ptr_to_text(pnode) |
95 else: | 102 else: |
96 if isinstance(pnode.text, str): | 103 if isinstance(pnode.text, str): |
97 ptext = pnode.text | 104 ptext = pkk_str_clean(pnode.text).strip() |
98 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: | 105 if pkk_cfg["annotate"] and isinstance(pnode.tag, str) and pnode.tag in pkk_str_fmap: |
99 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] | 106 stmp += pkk_str_fmap[pnode.tag][0] + ptext + pkk_str_fmap[pnode.tag][1] |
100 else: | 107 else: |
101 stmp += ptext | 108 stmp += ptext |
102 | 109 |
103 if isinstance(pnode.tail, str): | 110 if isinstance(pnode.tail, str): |
104 stmp += " "+ pnode.tail.strip() | 111 stmp += pkk_str_clean(pnode.tail) |
105 | 112 |
106 return stmp.strip() | 113 return stmp.strip() |
107 | 114 |
108 | 115 |
109 ## | 116 ## |
111 if lnode.tag in ["Example"]: | 118 if lnode.tag in ["Example"]: |
112 stmp = pkk_get_text(lnode) | 119 stmp = pkk_get_text(lnode) |
113 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) | 120 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) |
114 else: | 121 else: |
115 if isinstance(lnode.text, str): | 122 if isinstance(lnode.text, str): |
116 stmp = lnode.text.strip() | 123 stmp = pkk_str_clean(lnode.text).strip() |
117 if stmp != "": | 124 if stmp != "": |
118 stmp = " \""+ stmp +"\"" | 125 stmp = " \""+ stmp +"\"" |
119 else: | 126 else: |
120 stmp = "" | 127 stmp = "" |
121 | 128 |
122 if len(lnode.attrib) > 0: | 129 if len(lnode.attrib) > 0: |
123 atmp = " "+str(lnode.attrib) | 130 atmp = " "+ str(lnode.attrib) |
124 else: | 131 else: |
125 atmp = "" | 132 atmp = "" |
126 | 133 |
127 pkk_printi(indent, "{}{}{}\n".format(lnode.tag, atmp, stmp)) | 134 pkk_printi(indent, "{}{}{}\n".format(lnode.tag, atmp, stmp)) |
128 for qnode in lnode.findall("./*"): | 135 for qnode in lnode.findall("./*"): |
196 arg = arg.lstrip("-") | 203 arg = arg.lstrip("-") |
197 | 204 |
198 if arg == "help" or arg == "h": | 205 if arg == "help" or arg == "h": |
199 pkk_show_help = True | 206 pkk_show_help = True |
200 elif arg == "dump" or arg == "d": | 207 elif arg == "dump" or arg == "d": |
201 pkk_cfg["dump"] = True | 208 pkk_cfg["mode"] = 1 |
209 elif arg == "xml" or arg == "x": | |
210 pkk_cfg["mode"] = 2 | |
202 elif arg == "normalize" or arg == "n": | 211 elif arg == "normalize" or arg == "n": |
203 pkk_cfg["normalize"] = True | 212 pkk_cfg["normalize"] = True |
204 elif arg == "annotate" or arg == "a": | 213 elif arg == "annotate" or arg == "a": |
205 pkk_cfg["annotate"] = True | 214 pkk_cfg["annotate"] = True |
206 elif arg == "p": | 215 elif arg == "p": |
261 for dnode in xroot.findall("./DictionaryEntry"): | 270 for dnode in xroot.findall("./DictionaryEntry"): |
262 | 271 |
263 if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list: | 272 if pkk_cfg["debug"] and dnode.attrib["identifier"] not in pkk_debug_list: |
264 continue | 273 continue |
265 | 274 |
266 if pkk_cfg["dump"]: | 275 if pkk_cfg["mode"] == 0: |
276 pkk_output_node(0, dnode) | |
277 elif pkk_cfg["mode"] == 1: | |
267 pkk_dump_recursive(0, dnode) | 278 pkk_dump_recursive(0, dnode) |
279 elif pkk_cfg["mode"] == 2: | |
280 pkk_print(str(xmlET.tostring(dnode, encoding="utf8")) + "\n") | |
268 else: | 281 else: |
269 pkk_output_node(0, dnode) | 282 pkk_fatal("Invalid operation mode?") |
270 | 283 |
271 print("\n") | 284 print("\n") |
272 | 285 |
273 except (BrokenPipeError, IOError) as e: | 286 except (BrokenPipeError, IOError) as e: |
274 sys.stderr.close() | 287 sys.stderr.close() |