changeset 146:7106dd8db4de

Improve entity parsing, etc.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 05 Jun 2013 16:00:16 +0300
parents c94b4e1a2ed4
children 48460e925a8c
files feeds.tcl get_feeds.tcl
diffstat 2 files changed, 11 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/feeds.tcl	Wed Jun 05 15:51:30 2013 +0300
+++ b/feeds.tcl	Wed Jun 05 16:00:16 2013 +0300
@@ -118,7 +118,7 @@
 proc feeds_exec {} {
   global feeds_dbh feeds_check_period
 
-  feeds_log "Timed feed check."
+#  feeds_log "Timed feed check."
   set oldtime [feeds_check_start]
   set found 0
   
@@ -136,6 +136,7 @@
   timer $feeds_check_period feeds_exec
 }
 
+
 if {[info exists feeds_running]} {
   set feeds_last [expr [clock seconds] - $feeds_running]
 } else {
--- a/get_feeds.tcl	Wed Jun 05 15:51:30 2013 +0300
+++ b/get_feeds.tcl	Wed Jun 05 16:00:16 2013 +0300
@@ -26,7 +26,14 @@
 
 ##########################################################################
 
-set html_ent [split "&#160;| |&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>|ä|ä|ö|ö|Ä|Ä" "|"]
+set    feeds_ent_str "&#45;|-|&#39;|'|—|-|&rlm;||&#8212;|-|&#8211;|--|&#x202a;||&#x202c;|"
+append feeds_ent_str "|&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| "
+append feeds_ent_str "|&#8221;|\"|&#8220;|\"|&laquo;|<<|&raquo;|>>|&quot;|\""
+append feeds_ent_str "|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>"
+append feeds_ent_str "|&#228;|ä|&#229;|ö|&mdash;|-|&#039;|'|&ndash;|-|&#034;|\""
+append feeds_ent_str "|&#124;|-|&#8217;|'|&uuml;|ü|&Uuml;|Ü|&bull;|*|&euro;|€"
+append feeds_ent_str "|&rdquo;|\""
+set html_ent [split [encoding convertfrom "utf-8" $feeds_ent_str] "|"]
 
 package require http
 ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5"
@@ -37,10 +44,9 @@
 
 proc convert_ent {udata} {
   global html_ent
-  return [string map $html_ent $udata]
+  return [string map -nocase $html_ent [string map $html_ent $udata]]
 }
 
-
 proc add_entry {uname uprefix uurl utitle} {
   global currclock feeds_db nitems
   set utest "$uprefix[convert_ent $uurl]"