changeset 105:c81da31e3ab3

Merged.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 13 Sep 2011 15:53:52 +0300
parents 7819b337bb67 (current diff) da337ca10e0a (diff)
children fc50d5fd6ce8
files
diffstat 1 files changed, 8 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/urllog.tcl	Mon Sep 12 19:15:17 2011 +0300
+++ b/urllog.tcl	Tue Sep 13 15:53:52 2011 +0300
@@ -34,7 +34,7 @@
 
 # Enable _experimental_ TLS/SSL support. This may not work at all.
 # If unsure, leave this option disabled (0).
-set http_tls_support 0
+set http_tls_support 1
 
 set http_tls_cadir "/usr/share/ca-certificates/mozilla"
 
@@ -140,7 +140,9 @@
 set urllog_tlds [split $urllog_tlds ","]
 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] 
 
-set urllog_html_ent [split "—|-|&rlm;||&#8212;|-|&#x202a;||&#x202c;||&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| |&#8221;|\"|&#8220;|\"|&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>" "|"]
+
+set urllog_ent_str "&#39;|'|—|-|&rlm;||&#8212;|-|&#x202a;||&#x202c;||&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| |&#8221;|\"|&#8220;|\"|&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>"
+set urllog_html_ent [split [encoding convertfrom "utf-8" $urllog_ent_str] "|"]
 
 ### Require packages
 package require sqlite3
@@ -248,7 +250,7 @@
 proc urllog_convert_ent {udata} {
   global urllog_html_ent
   regsub -all "  " $udata " " utmp
-  regsub -all "\r" $udata " " utmp
+  regsub -all "\r" $utmp " " utmp
   regsub -all "\n" $utmp " " utmp
   regsub -all "  *" $utmp " " utmp
   regsub -all "\t" $utmp "" utmp
@@ -477,12 +479,11 @@
   set ucode [::http::ncode $utoken]
   if {$ucode >= 200 && $ucode <= 309} {
     set udata [::http::data $utoken]
-    set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata]
     set uconvert 0
-    if {[llength $umatches] > 0} {
-      set uencoding [lindex $umatches 1]
+    if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/>" $udata umatches uencoding]} {
       if {[string length $uencoding] > 3} {
-        regsub -nocase "-" $uencoding "" uencoding
+        set uencoding [string tolower $uencoding]
+        regsub -- "iso-" $uencoding "iso" uencoding
         set uconvert 1
       }
     }