# HG changeset patch # User Matti Hamalainen # Date 1431945456 -10800 # Node ID d8b957796121e2bae157a36ae67d9b39593bd21b # Parent a812bd6d752fabd0d773e259721d7ab29971c142 feeds: Refactor the feeds fetching. diff -r a812bd6d752f -r d8b957796121 fetch_feeds.tcl --- a/fetch_feeds.tcl Fri May 15 09:25:35 2015 +0300 +++ b/fetch_feeds.tcl Mon May 18 13:37:36 2015 +0300 @@ -41,18 +41,57 @@ ############################################################################## +proc fetch_dorequest { urlStr urlStatus urlSCode urlCode urlData urlMeta } { + upvar 1 $urlStatus ustatus + upvar 1 $urlSCode uscode + upvar 1 $urlCode ucode + upvar 1 $urlData udata + upvar 1 $urlMeta umeta + + if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers {Accept-Encoding identity}]} uerrmsg]} { + puts "HTTP request failed: $uerrmsg" + return 0 + } + + set ustatus [::http::status $utoken] + if {$ustatus == "timeout"} { + puts "HTTP request timed out ($urlStr)" + return 0 + } + + if {$ustatus != "ok"} { + puts "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" + return 0 + } + + set ustatus [::http::status $utoken] + set uscode [::http::code $utoken] + set ucode [::http::ncode $utoken] + set udata [::http::data $utoken] + array set umeta [::http::meta $utoken] + ::http::cleanup $utoken + + return 1 +} + + proc add_entry {uname uprefix uurl utitle} { global currclock feeds_db nitems set utmp [utl_convert_html_ent $uurl] if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} { set utest "$utmp" } else { - set utest "$uprefix$utmp" + if {[string range $uprefix end end] != "/" && [string range $utmp 0 0] != "/"} { + set utest "$uprefix/$utmp" + } else { + set utest "$uprefix$utmp" + } } set usql "SELECT title FROM feeds WHERE url='[utl_escape $utest]' AND feed='[utl_escape $uname]'" if {![feeds_db exists $usql]} { - set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[utl_escape $uname]', $currclock, '[utl_escape $utest]', '[utl_escape $utitle]')" +# puts "NEW: $utest : $utitle" + set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[utl_escape $uname]', $currclock, '[utl_escape $utest]', '[utl_escape [utl_convert_html_ent $utitle]]')" incr nitems if {[catch {feeds_db eval $usql} uerrmsg]} { puts "\nError: $uerrmsg on:\n$usql" @@ -63,7 +102,7 @@ proc add_rss_feed {datauri dataname dataprefix} { - if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + if {[catch {set utoken [::http::geturl $datauri -binary 1 -timeout 6000 -headers {Accept-Encoding identity}]} uerrmsg]} { puts "Error getting $datauri: $uerrmsg" return 1 } @@ -96,26 +135,15 @@ } -### Open database, etc -set nitems 0 -set currclock [clock seconds] -global feeds_db -if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} { - puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg." - exit 2 -} - - ############################################################################## ### Fetch and parse Halla-aho's blog page data -set datauri "http://www.halla-aho.com/scripta/"; -set dataname "Mestari" -if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" -} else { - set upage [::http::data $utoken] - ::http::cleanup $utoken - +proc fetch_halla_aho { } { + set datauri "http://www.halla-aho.com/scripta/"; + set dataname "Mestari" + if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { + return 0 + } + set umatches [regexp -all -nocase -inline -- "(\[^<\]+)" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { @@ -131,14 +159,13 @@ ### The Adventurers -set datauri "http://www.peldor.com/chapters/index_sidebar.html"; -set dataname "The Adventurers" -if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" -} else { - set upage [::http::data $utoken] - ::http::cleanup $utoken - +proc fetch_adventurers { } { + set datauri "http://www.peldor.com/chapters/index_sidebar.html"; + set dataname "The Adventurers" + if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { + return 0 + } + set umatches [regexp -all -nocase -inline -- "(\[^<\]+)" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { @@ -148,14 +175,13 @@ ### Order of the Stick -set datauri "http://www.giantitp.com/comics/oots.html"; -set dataname "OOTS" -if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" -} else { - set upage [::http::data $utoken] - ::http::cleanup $utoken - +proc fetch_oots { } { + set datauri "http://www.giantitp.com/comics/oots.html"; + set dataname "OOTS" + if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { + return 0 + } + set umatches [regexp -all -nocase -inline -- "(\[^<\]+)" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { @@ -164,19 +190,34 @@ } -### Generic RSS-feed fetching +### Open database, etc +set nitems 0 +set currclock [clock seconds] +global feeds_db +if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} { + puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg." + exit 2 +} + + +### Fetch the feeds +fetch_halla_aho + +fetch_adventurers + +fetch_oots + #add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" -add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" +#add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" - ### Close database feeds_db close