Mercurial > hg > lxmldump
comparison lxmldump.py @ 6:34a89d61dbe7
Merge and cleanup.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 10 May 2021 21:43:00 +0300 |
parents | 274b2091137c 7ce08dea935b |
children | 4b4299b62f7f |
comparison
equal
deleted
inserted
replaced
5:274b2091137c | 6:34a89d61dbe7 |
---|---|
52 pkk_cleanup() | 52 pkk_cleanup() |
53 print(u"\nQuitting due to SIGINT / Ctrl+C!") | 53 print(u"\nQuitting due to SIGINT / Ctrl+C!") |
54 sys.exit(1) | 54 sys.exit(1) |
55 | 55 |
56 | 56 |
57 ## | |
58 def pkk_dump_recursive(lnode, indent): | |
59 if lnode.tag == "Example": | |
60 stmp = "".join(lnode.itertext()).strip() | |
61 print("{}{} \"{}\"".format(" " * indent, lnode.tag, stmp)) | |
62 else: | |
63 stmp = "" | |
64 if lnode.text != None: | |
65 tmp = str(lnode.text).strip() | |
66 if tmp != "": | |
67 stmp = " \""+ tmp +"\"" | |
68 | |
69 if len(lnode.attrib) > 0: | |
70 atmp = " "+str(lnode.attrib) | |
71 else: | |
72 atmp = "" | |
73 | |
74 pkk_print("{}{}{}{}\n".format(" " * indent, lnode.tag, atmp, stmp)) | |
75 for qnode in lnode.findall("./*"): | |
76 pkk_dump_recursive(qnode, indent + 1) | |
77 | |
78 | |
79 ## | |
80 def pkk_output_node(dnode): | |
81 wlist = [] | |
82 dlist = [] | |
83 for wnode in dnode.findall("./HeadwordCtn"): | |
84 for qnode in wnode.findall("./SearchForm"): | |
85 wlist.append(str(qnode.text).strip()) | |
86 | |
87 for qnode in wnode.findall("./Definition"): | |
88 dlist.append(str(qnode.text).strip()) | |
89 | |
90 for wnode in dnode.findall("./SenseGrp"): | |
91 for qnode in wnode.findall("./Definition"): | |
92 dlist.append(str(qnode.text).strip()) | |
93 | |
94 pkk_print("{} : {}\n".format(", ".join(wlist), " ; ".join(dlist))) | |
95 | |
96 | |
57 ### | 97 ### |
58 ### Main program starts | 98 ### Main program starts |
59 ### | 99 ### |
60 signal.signal(signal.SIGINT, pkk_signal_handler) | 100 signal.signal(signal.SIGINT, pkk_signal_handler) |
61 | 101 |
105 print(u"Usage: {0} <options> <input xml file(s)>". | 145 print(u"Usage: {0} <options> <input xml file(s)>". |
106 format(str(Path(sys.argv[0]).name))) | 146 format(str(Path(sys.argv[0]).name))) |
107 print(u"") | 147 print(u"") |
108 print(u" --help Show this help") | 148 print(u" --help Show this help") |
109 print(u" -d, --dump Dump mode") | 149 print(u" -d, --dump Dump mode") |
150 print(u" -n, --normalize Output NFC normalized Unicode") | |
110 print(u"") | 151 print(u"") |
111 sys.exit(0) | 152 sys.exit(0) |
112 | 153 |
113 | 154 |
114 ### | 155 ### Handle each input file |
115 ### Main | |
116 ### | |
117 def pkk_dump_simple_node(lnode, indent): | |
118 stmp = "" | |
119 if lnode.text != None: | |
120 tmp = str(lnode.text).strip() | |
121 if tmp != "": | |
122 stmp = " \""+ tmp +"\"" | |
123 | |
124 pkk_print("{}{} {}{}".format(" " * indent, lnode.tag, lnode.attrib, stmp)) | |
125 for qnode in lnode.findall("./*"): | |
126 pkk_dump_simple_node(qnode, indent + 1) | |
127 | |
128 | |
129 def pkk_dump_node(dnode): | |
130 wlist = [] | |
131 dlist = [] | |
132 for wnode in dnode.findall("./HeadwordCtn"): | |
133 for qnode in wnode.findall("./SearchForm"): | |
134 wlist.append(str(qnode.text).strip()) | |
135 | |
136 for qnode in wnode.findall("./Definition"): | |
137 dlist.append(str(qnode.text).strip()) | |
138 | |
139 for wnode in dnode.findall("./SenseGrp"): | |
140 for qnode in wnode.findall("./Definition"): | |
141 dlist.append(str(qnode.text).strip()) | |
142 | |
143 pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist))) | |
144 | |
145 | |
146 for filename in pkk_filenames: | 156 for filename in pkk_filenames: |
147 # Parse XML file into element tree | 157 # Parse XML file into element tree |
148 try: | 158 try: |
149 uxml = xmlET.parse(filename) | 159 uxml = xmlET.parse(filename) |
150 except Exception as e: | 160 except Exception as e: |
153 # Dump output | 163 # Dump output |
154 try: | 164 try: |
155 xroot = uxml.getroot() | 165 xroot = uxml.getroot() |
156 for dnode in xroot.findall("./DictionaryEntry"): | 166 for dnode in xroot.findall("./DictionaryEntry"): |
157 if pkk_cfg["dump"]: | 167 if pkk_cfg["dump"]: |
158 pkk_dump_simple_node(dnode, 0) | 168 pkk_dump_recursive(dnode, 0) |
159 print("\n\n") | 169 print("\n\n") |
160 else: | 170 else: |
161 pkk_dump_node(dnode) | 171 pkk_output_node(dnode) |
162 | 172 |
163 except (BrokenPipeError, IOError) as e: | 173 except (BrokenPipeError, IOError) as e: |
164 sys.stderr.close() | 174 sys.stderr.close() |
165 sys.exit(1) | 175 sys.exit(1) |
166 | 176 |