changeset 321:d8b957796121

feeds: Refactor the feeds fetching.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 18 May 2015 13:37:36 +0300
parents a812bd6d752f
children b4adc56446f6
files fetch_feeds.tcl
diffstat 1 files changed, 81 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/fetch_feeds.tcl	Fri May 15 09:25:35 2015 +0300
+++ b/fetch_feeds.tcl	Mon May 18 13:37:36 2015 +0300
@@ -41,18 +41,57 @@
 
 ##############################################################################
 
+proc fetch_dorequest { urlStr urlStatus urlSCode urlCode urlData urlMeta } {
+  upvar 1 $urlStatus ustatus
+  upvar 1 $urlSCode uscode
+  upvar 1 $urlCode ucode
+  upvar 1 $urlData udata
+  upvar 1 $urlMeta umeta
+
+  if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers {Accept-Encoding identity}]} uerrmsg]} {
+    puts "HTTP request failed: $uerrmsg"
+    return 0
+  }
+
+  set ustatus [::http::status $utoken]
+  if {$ustatus == "timeout"} {
+    puts "HTTP request timed out ($urlStr)"
+    return 0
+  }
+
+  if {$ustatus != "ok"} {
+    puts "Error in HTTP transaction: [::http::error $utoken] ($urlStr)"
+    return 0
+  }
+
+  set ustatus [::http::status $utoken]
+  set uscode [::http::code $utoken]
+  set ucode [::http::ncode $utoken]
+  set udata [::http::data $utoken]
+  array set umeta [::http::meta $utoken]
+  ::http::cleanup $utoken
+
+  return 1
+}
+
+
 proc add_entry {uname uprefix uurl utitle} {
   global currclock feeds_db nitems
   set utmp [utl_convert_html_ent $uurl]
   if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} {
     set utest "$utmp"
   } else {
-    set utest "$uprefix$utmp"
+    if {[string range $uprefix end end] != "/" && [string range $utmp 0 0] != "/"} {
+      set utest "$uprefix/$utmp"
+    } else {
+      set utest "$uprefix$utmp"
+    }
   }
 
   set usql "SELECT title FROM feeds WHERE url='[utl_escape $utest]' AND feed='[utl_escape $uname]'"
   if {![feeds_db exists $usql]} {
-    set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[utl_escape $uname]', $currclock, '[utl_escape $utest]', '[utl_escape $utitle]')"
+#    puts "NEW: $utest : $utitle"
+    set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[utl_escape $uname]', $currclock, '[utl_escape $utest]', '[utl_escape [utl_convert_html_ent $utitle]]')"
     incr nitems
     if {[catch {feeds_db eval $usql} uerrmsg]} {
       puts "\nError: $uerrmsg on:\n$usql"
@@ -63,7 +102,7 @@
 
 
 proc add_rss_feed {datauri dataname dataprefix} {
-  if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
+  if {[catch {set utoken [::http::geturl $datauri -binary 1 -timeout 6000 -headers {Accept-Encoding identity}]} uerrmsg]} {
     puts "Error getting $datauri: $uerrmsg"
     return 1
   }
@@ -96,26 +135,15 @@
 }
 
 
-### Open database, etc
-set nitems 0
-set currclock [clock seconds]
-global feeds_db
-if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} {
-  puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg."
-  exit 2
-}
-
-
 ##############################################################################
 ### Fetch and parse Halla-aho's blog page data
-set datauri "http://www.halla-aho.com/scripta/";
-set dataname "Mestari"
-if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
-  puts "Error getting $datauri: $uerrmsg"
-} else {
-  set upage [::http::data $utoken]
-  ::http::cleanup $utoken
-  
+proc fetch_halla_aho { } {
+  set datauri "http://www.halla-aho.com/scripta/";
+  set dataname "Mestari"
+  if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} {
+    return 0
+  }
+
   set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage]
   set nmatches [llength $umatches]
   for {set n 0} {$n < $nmatches} {incr n 3} {
@@ -131,14 +159,13 @@
 
 
 ### The Adventurers
-set datauri "http://www.peldor.com/chapters/index_sidebar.html";
-set dataname "The Adventurers"
-if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
-  puts "Error getting $datauri: $uerrmsg"
-} else {
-  set upage [::http::data $utoken]
-  ::http::cleanup $utoken
-  
+proc fetch_adventurers { } {
+  set datauri "http://www.peldor.com/chapters/index_sidebar.html";
+  set dataname "The Adventurers"
+  if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} {
+    return 0
+  }
+
   set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage]
   set nmatches [llength $umatches]
   for {set n 0} {$n < $nmatches} {incr n 3} {
@@ -148,14 +175,13 @@
 
 
 ### Order of the Stick
-set datauri "http://www.giantitp.com/comics/oots.html";
-set dataname "OOTS"
-if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
-  puts "Error getting $datauri: $uerrmsg"
-} else {
-  set upage [::http::data $utoken]
-  ::http::cleanup $utoken
-  
+proc fetch_oots { } {
+  set datauri "http://www.giantitp.com/comics/oots.html";
+  set dataname "OOTS"
+  if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} {
+    return 0
+  }
+
   set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage]
   set nmatches [llength $umatches]
   for {set n 0} {$n < $nmatches} {incr n 3} {
@@ -164,19 +190,34 @@
 }
 
 
-### Generic RSS-feed fetching
+### Open database, etc
+set nitems 0
+set currclock [clock seconds]
+global feeds_db
+if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} {
+  puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg."
+  exit 2
+}
+
+
+### Fetch the feeds
+fetch_halla_aho
+
+fetch_adventurers
+
+fetch_oots
+
 #add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" ""
 
 add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" ""
 
 add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" ""
 
-add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" ""
+#add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" ""
 
 #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" ""
 
 
-
 ### Close database
 feeds_db close