changeset 102:5425dc418505

urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 13 Sep 2011 15:51:15 +0300
parents 372b63af72b5
children 453f978f19e5
files urllog.tcl
diffstat 1 files changed, 3 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/urllog.tcl	Tue Sep 13 15:49:43 2011 +0300
+++ b/urllog.tcl	Tue Sep 13 15:51:15 2011 +0300
@@ -140,7 +140,9 @@
 set urllog_tlds [split $urllog_tlds ","]
 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] 
 
-set urllog_html_ent [split "—|-|&rlm;||&#8212;|-|&#x202a;||&#x202c;||&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| |&#8221;|\"|&#8220;|\"|&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>" "|"]
+
+set urllog_ent_str "&#39;|'|—|-|&rlm;||&#8212;|-|&#x202a;||&#x202c;||&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| |&#8221;|\"|&#8220;|\"|&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>"
+set urllog_html_ent [split [encoding convertfrom "utf-8" $urllog_ent_str] "|"]
 
 ### Require packages
 package require sqlite3