comparison lxmldump.py @ 6:34a89d61dbe7

Merge and cleanup.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 10 May 2021 21:43:00 +0300
parents 274b2091137c 7ce08dea935b
children 4b4299b62f7f
comparison
equal deleted inserted replaced
5:274b2091137c 6:34a89d61dbe7
52 pkk_cleanup() 52 pkk_cleanup()
53 print(u"\nQuitting due to SIGINT / Ctrl+C!") 53 print(u"\nQuitting due to SIGINT / Ctrl+C!")
54 sys.exit(1) 54 sys.exit(1)
55 55
56 56
57 ##
58 def pkk_dump_recursive(lnode, indent):
59 if lnode.tag == "Example":
60 stmp = "".join(lnode.itertext()).strip()
61 print("{}{} \"{}\"".format(" " * indent, lnode.tag, stmp))
62 else:
63 stmp = ""
64 if lnode.text != None:
65 tmp = str(lnode.text).strip()
66 if tmp != "":
67 stmp = " \""+ tmp +"\""
68
69 if len(lnode.attrib) > 0:
70 atmp = " "+str(lnode.attrib)
71 else:
72 atmp = ""
73
74 pkk_print("{}{}{}{}\n".format(" " * indent, lnode.tag, atmp, stmp))
75 for qnode in lnode.findall("./*"):
76 pkk_dump_recursive(qnode, indent + 1)
77
78
79 ##
80 def pkk_output_node(dnode):
81 wlist = []
82 dlist = []
83 for wnode in dnode.findall("./HeadwordCtn"):
84 for qnode in wnode.findall("./SearchForm"):
85 wlist.append(str(qnode.text).strip())
86
87 for qnode in wnode.findall("./Definition"):
88 dlist.append(str(qnode.text).strip())
89
90 for wnode in dnode.findall("./SenseGrp"):
91 for qnode in wnode.findall("./Definition"):
92 dlist.append(str(qnode.text).strip())
93
94 pkk_print("{} : {}\n".format(", ".join(wlist), " ; ".join(dlist)))
95
96
57 ### 97 ###
58 ### Main program starts 98 ### Main program starts
59 ### 99 ###
60 signal.signal(signal.SIGINT, pkk_signal_handler) 100 signal.signal(signal.SIGINT, pkk_signal_handler)
61 101
105 print(u"Usage: {0} <options> <input xml file(s)>". 145 print(u"Usage: {0} <options> <input xml file(s)>".
106 format(str(Path(sys.argv[0]).name))) 146 format(str(Path(sys.argv[0]).name)))
107 print(u"") 147 print(u"")
108 print(u" --help Show this help") 148 print(u" --help Show this help")
109 print(u" -d, --dump Dump mode") 149 print(u" -d, --dump Dump mode")
150 print(u" -n, --normalize Output NFC normalized Unicode")
110 print(u"") 151 print(u"")
111 sys.exit(0) 152 sys.exit(0)
112 153
113 154
114 ### 155 ### Handle each input file
115 ### Main
116 ###
117 def pkk_dump_simple_node(lnode, indent):
118 stmp = ""
119 if lnode.text != None:
120 tmp = str(lnode.text).strip()
121 if tmp != "":
122 stmp = " \""+ tmp +"\""
123
124 pkk_print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp))
125 for qnode in lnode.findall("./*"):
126 pkk_dump_simple_node(qnode, indent + 1)
127
128
129 def pkk_dump_node(dnode):
130 wlist = []
131 dlist = []
132 for wnode in dnode.findall("./HeadwordCtn"):
133 for qnode in wnode.findall("./SearchForm"):
134 wlist.append(str(qnode.text).strip())
135
136 for qnode in wnode.findall("./Definition"):
137 dlist.append(str(qnode.text).strip())
138
139 for wnode in dnode.findall("./SenseGrp"):
140 for qnode in wnode.findall("./Definition"):
141 dlist.append(str(qnode.text).strip())
142
143 pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))
144
145
146 for filename in pkk_filenames: 156 for filename in pkk_filenames:
147 # Parse XML file into element tree 157 # Parse XML file into element tree
148 try: 158 try:
149 uxml = xmlET.parse(filename) 159 uxml = xmlET.parse(filename)
150 except Exception as e: 160 except Exception as e:
153 # Dump output 163 # Dump output
154 try: 164 try:
155 xroot = uxml.getroot() 165 xroot = uxml.getroot()
156 for dnode in xroot.findall("./DictionaryEntry"): 166 for dnode in xroot.findall("./DictionaryEntry"):
157 if pkk_cfg["dump"]: 167 if pkk_cfg["dump"]:
158 pkk_dump_simple_node(dnode, 0) 168 pkk_dump_recursive(dnode, 0)
159 print("\n\n") 169 print("\n\n")
160 else: 170 else:
161 pkk_dump_node(dnode) 171 pkk_output_node(dnode)
162 172
163 except (BrokenPipeError, IOError) as e: 173 except (BrokenPipeError, IOError) as e:
164 sys.stderr.close() 174 sys.stderr.close()
165 sys.exit(1) 175 sys.exit(1)
166 176