Mercurial > hg > egg-tcls
view get_feeds.tcl @ 156:a1f4c163e48c
weather: Add max results limit.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 01 Jun 2014 18:47:36 +0300 |
parents | 48460e925a8c |
children |
line wrap: on
line source
#!/usr/bin/tclsh # # NOTICE! Change above path to correct tclsh binary path! # ########################################################################## # # FeedCheck fetcher v0.8 by Matti 'ccr' Hamalainen <ccr@tnsp.org> # (C) Copyright 2008-2013 Tecnic Software productions (TNSP) # # This script is freely distributable under GNU GPL (version 2) license. # ########################################################################## package require sqlite3 source [file dirname [info script]]/util_convert.tcl # SQLite3 database, MUST be set to same as in feeds.tcl set feeds_dbfile "/home/niinuska/bot/feeds.sqlite" # Use a HTTP proxy? 1 = yes, 0 = no set http_proxy 0 # HTTP proxy address and port set http_proxy_host "cache.inet.fi" set http_proxy_port 800 ########################################################################## set feeds_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬|" append feeds_ent_str "|‎||å|å|Å|Å|é|é|:|:| | " append feeds_ent_str "|”|\"|“|\"|«|<<|»|>>|"|\"" append feeds_ent_str "|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" append feeds_ent_str "|ä|ä|å|ö|—|-|'|'|–|-|"|\"" append feeds_ent_str "|||-|’|'|ü|ü|Ü|Ü|•|*|€|€" append feeds_ent_str "|”|\"" set html_ent [split [encoding convertfrom "utf-8" $feeds_ent_str] "|"] package require http ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5" if {$http_proxy != 0} { ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port } proc convert_ent {udata} { global html_ent return [string map -nocase $html_ent [string map $html_ent $udata]] } proc add_entry {uname uprefix uurl utitle} { global currclock feeds_db nitems set utmp [convert_ent $uurl] if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} { set utest "$utmp" } else { set utest "$uprefix$utmp" } set usql "SELECT title FROM feeds WHERE url='[escape $utest]' AND feed='[escape $uname]'" if {![feeds_db exists $usql]} { set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[escape $uname]', $currclock, '[escape $utest]', '[escape $utitle]')" incr nitems if {[catch {feeds_db eval $usql} uerrmsg]} { puts "\nError: $uerrmsg on:\n$usql" exit 15 } } } proc add_rss_feed {datauri dataname dataprefix} { if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { puts "Error getting $datauri: $uerrmsg" return 1 } set upage [::http::data $utoken] ::http::cleanup $utoken set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] } if {$nmatches == 0} { set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] } } if {$nmatches == 0} { set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] } } return 0 } ### Open database, etc set nitems 0 set currclock [clock seconds] global feeds_db if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} { puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg." exit 2 } ############################################################################## ### Fetch and parse Halla-aho's blog page data set datauri "http://www.halla-aho.com/scripta/"; set dataname "Mestari" if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { puts "Error getting $datauri: $uerrmsg" } else { set upage [::http::data $utoken] ::http::cleanup $utoken set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] } set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] } } ### The Adventurers set datauri "http://www.peldor.com/chapters/index_sidebar.html"; set dataname "The Adventurers" if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { puts "Error getting $datauri: $uerrmsg" } else { set upage [::http::data $utoken] ::http::cleanup $utoken set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] } } ### Order of the Stick set datauri "http://www.giantitp.com/comics/oots.html"; set dataname "OOTS" if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { puts "Error getting $datauri: $uerrmsg" } else { set upage [::http::data $utoken] ::http::cleanup $utoken set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage] set nmatches [llength $umatches] for {set n 0} {$n < $nmatches} {incr n 3} { add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] } } ### Generic RSS-feed fetching #add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" ### Close database feeds_db close puts "$nitems new items."