comparison lxmldump.py @ 28:3442b8700da7

Comments.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 25 May 2021 11:42:44 +0300
parents d77ab8a300b1
children f91ef7d7615b
comparison
equal deleted inserted replaced
27:d77ab8a300b1 28:3442b8700da7
36 PKK_MODE_NORMAL: ["<", ">"], 36 PKK_MODE_NORMAL: ["<", ">"],
37 }, 37 },
38 } 38 }
39 39
40 40
41 # List of words in kks1/ useful for debugging, option -p
41 pkk_debug_list = [ 42 pkk_debug_list = [
42 "ahas", 43 "ahas",
43 "ahavakkaine", 44 "ahavakkaine",
44 "ahavakala", 45 "ahavakala",
45 "ahavakoittuo", 46 "ahavakoittuo",
86 pkk_cleanup() 87 pkk_cleanup()
87 print(u"\nQuitting due to SIGINT / Ctrl+C!") 88 print(u"\nQuitting due to SIGINT / Ctrl+C!")
88 sys.exit(1) 89 sys.exit(1)
89 90
90 91
91 ## Annotate string 92 ## Annotate given string with prefix and suffix based on tag
92 def pkk_str_annotate(mtag, mstr): 93 def pkk_str_annotate(mtag, mstr):
93 if pkk_cfg.annotate and mtag in pkk_element_annotation_map: 94 if pkk_cfg.annotate and mtag in pkk_element_annotation_map:
94 if pkk_cfg.mode in pkk_element_annotation_map[mtag]: 95 if pkk_cfg.mode in pkk_element_annotation_map[mtag]:
95 mmode = pkk_cfg.mode 96 mmode = pkk_cfg.mode
96 else: 97 else:
104 ## Clean string by removing tabs and newlines 105 ## Clean string by removing tabs and newlines
105 def pkk_str_clean(mstr): 106 def pkk_str_clean(mstr):
106 return re.sub(r'[\n\r\t]', '', mstr) 107 return re.sub(r'[\n\r\t]', '', mstr)
107 108
108 109
109 ## Format "Ptr" node as text 110 ## Format a "Ptr" node as text
110 def pkk_ptr_to_text(pnode): 111 def pkk_ptr_to_text(pnode):
111 phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"] 112 phref = pnode.attrib["{http://www.w3.org/TR/xlink}href"]
112 ptext = ("".join(pnode.itertext())).strip() 113 ptext = ("".join(pnode.itertext())).strip()
113 114
114 return f"<PTR:{phref}>{ptext}</PTR>" 115 return f"<PTR:{phref}>{ptext}</PTR>"