Mercurial > hg > lxmldump
comparison lxmldump.py @ 28:3442b8700da7
Comments.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 25 May 2021 11:42:44 +0300 |
parents | d77ab8a300b1 |
children | f91ef7d7615b |
comparison
equal
deleted
inserted
replaced
27:d77ab8a300b1 | 28:3442b8700da7 |
---|---|
36 PKK_MODE_NORMAL: ["<", ">"], | 36 PKK_MODE_NORMAL: ["<", ">"], |
37 }, | 37 }, |
38 } | 38 } |
39 | 39 |
40 | 40 |
41 # List of words in kks1/ useful for debugging, option -p | |
41 pkk_debug_list = [ | 42 pkk_debug_list = [ |
42 "ahas", | 43 "ahas", |
43 "ahavakkaine", | 44 "ahavakkaine", |
44 "ahavakala", | 45 "ahavakala", |
45 "ahavakoittuo", | 46 "ahavakoittuo", |
86 pkk_cleanup() | 87 pkk_cleanup() |
87 print(u"\nQuitting due to SIGINT / Ctrl+C!") | 88 print(u"\nQuitting due to SIGINT / Ctrl+C!") |
88 sys.exit(1) | 89 sys.exit(1) |
89 | 90 |
90 | 91 |
91 ## Annotate string | 92 ## Annotate given string with prefix and suffix based on tag |
92 def pkk_str_annotate(mtag, mstr): | 93 def pkk_str_annotate(mtag, mstr): |
93 if pkk_cfg.annotate and mtag in pkk_element_annotation_map: | 94 if pkk_cfg.annotate and mtag in pkk_element_annotation_map: |
94 if pkk_cfg.mode in pkk_element_annotation_map[mtag]: | 95 if pkk_cfg.mode in pkk_element_annotation_map[mtag]: |
95 mmode = pkk_cfg.mode | 96 mmode = pkk_cfg.mode |
96 else: | 97 else: |
104 ## Clean string by removing tabs and newlines | 105 ## Clean string by removing tabs and newlines |
105 def pkk_str_clean(mstr): | 106 def pkk_str_clean(mstr): |
106 return re.sub(r'[\n\r\t]', '', mstr) | 107 return re.sub(r'[\n\r\t]', '', mstr) |
107 | 108 |
108 | 109 |
109 ## Format "Ptr" node as text | 110 ## Format a "Ptr" node as text |
110 def pkk_ptr_to_text(pnode): | 111 def pkk_ptr_to_text(pnode): |
111 phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] | 112 phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] |
112 ptext = ("".join(pnode.itertext())).strip() | 113 ptext = ("".join(pnode.itertext())).strip() |
113 | 114 |
114 return f"<PTR:{phref}>{ptext}</PTR>" | 115 return f"<PTR:{phref}>{ptext}</PTR>" |