comparison lxmldump.py @ 16:285b0820d2c6

Cleanup.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 May 2021 14:47:01 +0300
parents 2f86537ff1f7
children 6fa24c711f86
comparison
equal deleted inserted replaced
15:2f86537ff1f7 16:285b0820d2c6
91 return re.sub(r'[\n\r\t]', '', mstr) 91 return re.sub(r'[\n\r\t]', '', mstr)
92 92
93 93
94 ## Format "Ptr" node as text 94 ## Format "Ptr" node as text
95 def pkk_ptr_to_text(pnode): 95 def pkk_ptr_to_text(pnode):
96 return "PTR: <{}>{}</>".format( 96 return "<PTR:{}>{}</PTR>".format(
97 pnode.attrib["{http://www.w3.org/TR/xlink}href"], 97 pnode.attrib["{http://www.w3.org/TR/xlink}href"],
98 ("".join(pnode.itertext())).strip()) 98 ("".join(pnode.itertext())).strip())
99 99
100 100
101 ## Get text inside a given node 101 ## Get text inside a given node
102 def pkk_get_text(lnode): 102 def pkk_node_to_text(lnode):
103 stmp = "" 103 stmp = ""
104 for pnode in lnode.iter(): 104 for pnode in lnode.iter():
105 if pnode.tag == "Ptr": 105 if pnode.tag == "Ptr":
106 stmp += pkk_ptr_to_text(pnode) 106 stmp += pkk_ptr_to_text(pnode)
107 else: 107 else:
119 119
120 120
121 ## Simple recursive dump starting at given node 121 ## Simple recursive dump starting at given node
122 def pkk_dump_recursive(indent, lnode): 122 def pkk_dump_recursive(indent, lnode):
123 if lnode.tag in ["Example"]: 123 if lnode.tag in ["Example"]:
124 stmp = pkk_get_text(lnode) 124 stmp = pkk_node_to_text(lnode)
125 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp)) 125 pkk_printi(indent, "{} \"{}\"\n".format(lnode.tag, stmp))
126 else: 126 else:
127 if isinstance(lnode.text, str): 127 if isinstance(lnode.text, str):
128 stmp = pkk_str_clean(lnode.text).strip() 128 stmp = pkk_str_clean(lnode.text).strip()
129 if stmp != "": 129 if stmp != "":
142 142
143 143
144 ## Output item under given node 144 ## Output item under given node
145 def pkk_output_subs_fmt(indent, dnode, dsub, dname, dfmt): 145 def pkk_output_subs_fmt(indent, dnode, dsub, dname, dfmt):
146 for qnode in dnode.findall(dsub): 146 for qnode in dnode.findall(dsub):
147 pkk_printi(indent, dfmt.format(dname, pkk_get_text(qnode))) 147 pkk_printi(indent, dfmt.format(dname, pkk_node_to_text(qnode)))
148 148
149 149
150 def pkk_output_subs_prefix(indent, dnode, dsub, dname): 150 def pkk_output_subs_prefix(indent, dnode, dsub, dname):
151 pkk_output_subs_fmt(indent, dnode, dsub, dname, "{0} \"{1}\"\n") 151 pkk_output_subs_fmt(indent, dnode, dsub, dname, "{0} \"{1}\"\n")
152 152
154 def pkk_output_sense(indent, dnode): 154 def pkk_output_sense(indent, dnode):
155 pkk_output_subs_prefix(indent, dnode, "./SearchForm", "srch") 155 pkk_output_subs_prefix(indent, dnode, "./SearchForm", "srch")
156 pkk_output_subs_prefix(indent, dnode, "./Definition", "defn") 156 pkk_output_subs_prefix(indent, dnode, "./Definition", "defn")
157 157
158 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"): 158 for wnode in dnode.findall("./ExampleBlock/ExampleCtn"):
159 sstr = pkk_get_text(wnode.find("./Example")) 159 sstr = pkk_node_to_text(wnode.find("./Example"))
160 lstr = "" 160 lstr = ""
161 161
162 if pkk_verbosity(1): 162 if pkk_verbosity(1):
163 ltmp = [] 163 ltmp = []
164 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"): 164 for qnode in wnode.findall("./FreeTopic[@type='levikki']/GeographicalUsage"):
165 ltmp.append("{} [{}]".format(pkk_get_text(qnode), qnode.attrib["class"])) 165 ltmp.append("{} [{}]".format(pkk_node_to_text(qnode), qnode.attrib["class"]))
166 166
167 if len(ltmp) > 0: 167 if len(ltmp) > 0:
168 lstr = " ({})".format(", ".join(ltmp)) 168 lstr = " ({})".format(", ".join(ltmp))
169 169
170 pkk_printi(indent + 1, "{} \"{}\"{}\n".format("exmp", sstr, lstr)) 170 pkk_printi(indent + 1, "{} \"{}\"{}\n".format("exmp", sstr, lstr))