Mercurial > hg > egg-tcls
changeset 101:372b63af72b5
urllog: Improve page character set encoding detection/guessing.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 13 Sep 2011 15:49:43 +0300 |
parents | 8139169293f9 |
children | 5425dc418505 |
files | urllog.tcl |
diffstat | 1 files changed, 3 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/urllog.tcl Mon Sep 12 19:14:14 2011 +0300 +++ b/urllog.tcl Tue Sep 13 15:49:43 2011 +0300 @@ -477,12 +477,11 @@ set ucode [::http::ncode $utoken] if {$ucode >= 200 && $ucode <= 309} { set udata [::http::data $utoken] - set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] set uconvert 0 - if {[llength $umatches] > 0} { - set uencoding [lindex $umatches 1] + if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/>" $udata umatches uencoding]} { if {[string length $uencoding] > 3} { - regsub -nocase "-" $uencoding "" uencoding + set uencoding [string tolower $uencoding] + regsub -- "iso-" $uencoding "iso" uencoding set uconvert 1 } }