# HG changeset patch # User Matti Hamalainen # Date 1370437216 -10800 # Node ID 7106dd8db4de92112af40e0fc409a4bcb161fde4 # Parent c94b4e1a2ed414d6cebff67b13d23cd98f661e5a Improve entity parsing, etc. diff -r c94b4e1a2ed4 -r 7106dd8db4de feeds.tcl --- a/feeds.tcl Wed Jun 05 15:51:30 2013 +0300 +++ b/feeds.tcl Wed Jun 05 16:00:16 2013 +0300 @@ -118,7 +118,7 @@ proc feeds_exec {} { global feeds_dbh feeds_check_period - feeds_log "Timed feed check." +# feeds_log "Timed feed check." set oldtime [feeds_check_start] set found 0 @@ -136,6 +136,7 @@ timer $feeds_check_period feeds_exec } + if {[info exists feeds_running]} { set feeds_last [expr [clock seconds] - $feeds_running] } else { diff -r c94b4e1a2ed4 -r 7106dd8db4de get_feeds.tcl --- a/get_feeds.tcl Wed Jun 05 15:51:30 2013 +0300 +++ b/get_feeds.tcl Wed Jun 05 16:00:16 2013 +0300 @@ -26,7 +26,14 @@ ########################################################################## -set html_ent [split " | |»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] +set feeds_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬|" +append feeds_ent_str "|‎||å|Ã¥|Å|Ã…|é|é|:|:| | " +append feeds_ent_str "|”|\"|“|\"|«|<<|»|>>|"|\"" +append feeds_ent_str "|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" +append feeds_ent_str "|ä|ä|å|ö|—|-|'|'|–|-|"|\"" +append feeds_ent_str "|||-|’|'|ü|ü|Ü|Ãœ|•|*|€|€" +append feeds_ent_str "|”|\"" +set html_ent [split [encoding convertfrom "utf-8" $feeds_ent_str] "|"] package require http ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5" @@ -37,10 +44,9 @@ proc convert_ent {udata} { global html_ent - return [string map $html_ent $udata] + return [string map -nocase $html_ent [string map $html_ent $udata]] } - proc add_entry {uname uprefix uurl utitle} { global currclock feeds_db nitems set utest "$uprefix[convert_ent $uurl]"