Mercurial > hg > lxmldump
comparison lxmldump.py @ 13:3bd772fd6a50
Cleanups.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 11 May 2021 12:49:15 +0300 |
parents | d50e71642be7 |
children | 7498bda8b4a2 |
comparison
equal
deleted
inserted
replaced
12:d50e71642be7 | 13:3bd772fd6a50 |
---|---|
52 ### | 52 ### |
53 def pkk_cleanup(): | 53 def pkk_cleanup(): |
54 return 0 | 54 return 0 |
55 | 55 |
56 | 56 |
57 ## Wrapper for print() | 57 ## Print string to stdout using normalized Unicode if enabled |
58 def pkk_print(smsg): | 58 def pkk_print(smsg): |
59 try: | 59 try: |
60 if pkk_cfg["normalize"]: | 60 if pkk_cfg["normalize"]: |
61 sys.stdout.write(unicodedata.normalize("NFC", smsg)) | 61 sys.stdout.write(unicodedata.normalize("NFC", smsg)) |
62 else: | 62 else: |
64 | 64 |
65 except (BrokenPipeError, IOError) as e: | 65 except (BrokenPipeError, IOError) as e: |
66 sys.stderr.close() | 66 sys.stderr.close() |
67 | 67 |
68 | 68 |
69 ## Print string with indentation | |
69 def pkk_printi(indent, smsg): | 70 def pkk_printi(indent, smsg): |
70 pkk_print((" " * indent) + smsg) | 71 pkk_print((" " * indent) + smsg) |
71 | 72 |
72 | 73 |
74 ## Check value against current verbosity level | |
73 def pkk_verbosity(lvl): | 75 def pkk_verbosity(lvl): |
74 return pkk_cfg["verbosity"] >= lvl | 76 return pkk_cfg["verbosity"] >= lvl |
75 | 77 |
76 | 78 |
77 ## Fatal error handler | 79 ## Fatal error handler |
85 pkk_cleanup() | 87 pkk_cleanup() |
86 print(u"\nQuitting due to SIGINT / Ctrl+C!") | 88 print(u"\nQuitting due to SIGINT / Ctrl+C!") |
87 sys.exit(1) | 89 sys.exit(1) |
88 | 90 |
89 | 91 |
90 ## | 92 ## Clean string by removing tabs and newlines |
91 def pkk_str_clean(mstr): | 93 def pkk_str_clean(mstr): |
92 return re.sub(r'[\n\r\t]', '', mstr) | 94 return re.sub(r'[\n\r\t]', '', mstr) |
93 | 95 |
94 | 96 |
97 ## Format "Ptr" node as text | |
95 def pkk_ptr_to_text(pnode): | 98 def pkk_ptr_to_text(pnode): |
96 return "PTR: <{}>{}</>".format( | 99 return "PTR: <{}>{}</>".format( |
97 pnode.attrib["{http://www.w3.org/TR/xlink}href"], | 100 pnode.attrib["{http://www.w3.org/TR/xlink}href"], |
98 ("".join(pnode.itertext())).strip()) | 101 ("".join(pnode.itertext())).strip()) |
99 | 102 |
100 | 103 |
104 ## Get text inside a given node | |
101 def pkk_get_text(lnode): | 105 def pkk_get_text(lnode): |
102 stmp = "" | 106 stmp = "" |
103 for pnode in lnode.iter(): | 107 for pnode in lnode.iter(): |
104 if pnode.tag == "Ptr": | 108 if pnode.tag == "Ptr": |
105 stmp += pkk_ptr_to_text(pnode) | 109 stmp += pkk_ptr_to_text(pnode) |
115 stmp += pkk_str_clean(pnode.tail) | 119 stmp += pkk_str_clean(pnode.tail) |
116 | 120 |
117 return stmp.strip() | 121 return stmp.strip() |
118 | 122 |
119 | 123 |
120 ## | 124 ## Simple recursive dump starting at given node |
121 def pkk_dump_recursive(indent, lnode): | 125 def pkk_dump_recursive(indent, lnode): |
122 if lnode.tag in ["Example"]: | 126 if lnode.tag in ["Example"]: |
123 stmp = pkk_get_text(lnode) | 127 stmp = pkk_get_text(lnode) |
124 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) | 128 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) |
125 else: | 129 else: |
138 pkk_printi(indent, "{}{}{}\n".format(lnode.tag, atmp, stmp)) | 142 pkk_printi(indent, "{}{}{}\n".format(lnode.tag, atmp, stmp)) |
139 for qnode in lnode.findall("./*"): | 143 for qnode in lnode.findall("./*"): |
140 pkk_dump_recursive(indent + 1, qnode) | 144 pkk_dump_recursive(indent + 1, qnode) |
141 | 145 |
142 | 146 |
143 ## | 147 ## Output item under given node |
144 def pkk_output_one(indent, dnode, dsub, dfmt): | 148 def pkk_output_subs_fmt(indent, dnode, dsub, dname, dfmt): |
145 for qnode in dnode.findall(dsub): | 149 for qnode in dnode.findall(dsub): |
146 pkk_printi(indent, dfmt.format(pkk_get_text(qnode))) | 150 pkk_printi(indent, dfmt.format(dname, pkk_get_text(qnode))) |
147 | 151 |
148 | 152 |
149 def pkk_output_subs(indent, dnode, dsub, dname): | 153 def pkk_output_subs_prefix(indent, dnode, dsub, dname): |
150 for qnode in dnode.findall(dsub): | 154 pkk_output_subs_fmt(indent, dnode, dsub, dname, "{0} \"{1}\"\n") |
151 pkk_printi(indent, "{} \"{}\"\n".format(dname, pkk_get_text(qnode))) | |
152 | 155 |
153 | 156 |
154 def pkk_output_sense(indent, dnode): | 157 def pkk_output_sense(indent, dnode): |
155 pkk_output_subs(indent, dnode, "./SearchForm", "srch") | 158 pkk_output_subs_prefix(indent, dnode, "./SearchForm", "srch") |
156 pkk_output_subs(indent, dnode, "./Definition", "defn") | 159 pkk_output_subs_prefix(indent, dnode, "./Definition", "defn") |
157 | 160 |
158 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): | 161 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): |
159 sstr = pkk_get_text(wnode.find("./Example")) | 162 sstr = pkk_get_text(wnode.find("./Example")) |
160 lstr = "" | 163 lstr = "" |
161 | 164 |
171 | 174 |
172 | 175 |
173 def pkk_output_node(indent, dnode): | 176 def pkk_output_node(indent, dnode): |
174 | 177 |
175 for wnode in dnode.findall("./HeadwordCtn"): | 178 for wnode in dnode.findall("./HeadwordCtn"): |
176 pkk_output_one (indent, wnode, "./Headword", "\"{}\":\n") | 179 pkk_output_subs_fmt(indent, wnode, "./Headword", "", "\"{1}\":\n") |
177 pkk_output_sense(indent + 1, wnode) | 180 pkk_output_sense(indent + 1, wnode) |
178 | 181 |
179 index = 1 | 182 index = 1 |
180 for wnode in dnode.findall("./SenseGrp"): | 183 for wnode in dnode.findall("./SenseGrp"): |
181 pkk_printi(indent + 1, "sense #{}\n".format(index)) | 184 pkk_printi(indent + 1, "sense #{}\n".format(index)) |