Mercurial > hg > egg-tcls
changeset 159:bbc7860c22a6
Renamed.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 02 Jun 2014 12:29:47 +0300 |
parents | db2b1f74d994 |
children | e3e156911ab4 |
files | fetch_feeds.tcl get_feeds.tcl |
diffstat | 2 files changed, 189 insertions(+), 189 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetch_feeds.tcl Mon Jun 02 12:29:47 2014 +0300 @@ -0,0 +1,189 @@ +#!/usr/bin/tclsh +# +# NOTICE! Change above path to correct tclsh binary path! +# +########################################################################## +# +# FeedCheck fetcher v0.8 by Matti 'ccr' Hamalainen <ccr@tnsp.org> +# (C) Copyright 2008-2013 Tecnic Software productions (TNSP) +# +# This script is freely distributable under GNU GPL (version 2) license. +# +########################################################################## +package require sqlite3 +source [file dirname [info script]]/util_convert.tcl + +# SQLite3 database, MUST be set to same as in feeds.tcl +set feeds_dbfile "/home/niinuska/bot/feeds.sqlite" + +# Use a HTTP proxy? 1 = yes, 0 = no +set http_proxy 0 + +# HTTP proxy address and port +set http_proxy_host "cache.inet.fi" +set http_proxy_port 800 + + +########################################################################## + +set feeds_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬|" +append feeds_ent_str "|‎||å|å|Å|Å|é|é|:|:| | " +append feeds_ent_str "|”|\"|“|\"|«|<<|»|>>|"|\"" +append feeds_ent_str "|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" +append feeds_ent_str "|ä|ä|å|ö|—|-|'|'|–|-|"|\"" +append feeds_ent_str "|||-|’|'|ü|ü|Ü|Ü|•|*|€|€" +append feeds_ent_str "|”|\"" +set html_ent [split [encoding convertfrom "utf-8" $feeds_ent_str] "|"] + +package require http +::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5" +if {$http_proxy != 0} { + ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port +} + + +proc convert_ent {udata} { + global html_ent + return [string map -nocase $html_ent [string map $html_ent $udata]] +} + +proc add_entry {uname uprefix uurl utitle} { + global currclock feeds_db nitems + set utmp [convert_ent $uurl] + if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} { + set utest "$utmp" + } else { + set utest "$uprefix$utmp" + } + + set usql "SELECT title FROM feeds WHERE url='[escape $utest]' AND feed='[escape $uname]'" + if {![feeds_db exists $usql]} { + set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[escape $uname]', $currclock, '[escape $utest]', '[escape $utitle]')" + incr nitems + if {[catch {feeds_db eval $usql} uerrmsg]} { + puts "\nError: $uerrmsg on:\n$usql" + exit 15 + } + } +} + + +proc add_rss_feed {datauri dataname dataprefix} { + if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" + return 1 + } + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] + } + + if {$nmatches == 0} { + set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] + } + } + + if {$nmatches == 0} { + set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] + } + } + + return 0 +} + + +### Open database, etc +set nitems 0 +set currclock [clock seconds] +global feeds_db +if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} { + puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg." + exit 2 +} + + +############################################################################## +### Fetch and parse Halla-aho's blog page data +set datauri "http://www.halla-aho.com/scripta/"; +set dataname "Mestari" +if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" +} else { + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } + + set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } +} + + +### The Adventurers +set datauri "http://www.peldor.com/chapters/index_sidebar.html"; +set dataname "The Adventurers" +if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" +} else { + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } +} + + +### Order of the Stick +set datauri "http://www.giantitp.com/comics/oots.html"; +set dataname "OOTS" +if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" +} else { + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } +} + + +### Generic RSS-feed fetching +#add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" + +add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" + +add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" + +add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" + +#add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" + + + +### Close database +feeds_db close + +puts "$nitems new items."
--- a/get_feeds.tcl Mon Jun 02 12:29:11 2014 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,189 +0,0 @@ -#!/usr/bin/tclsh -# -# NOTICE! Change above path to correct tclsh binary path! -# -########################################################################## -# -# FeedCheck fetcher v0.8 by Matti 'ccr' Hamalainen <ccr@tnsp.org> -# (C) Copyright 2008-2013 Tecnic Software productions (TNSP) -# -# This script is freely distributable under GNU GPL (version 2) license. -# -########################################################################## -package require sqlite3 -source [file dirname [info script]]/util_convert.tcl - -# SQLite3 database, MUST be set to same as in feeds.tcl -set feeds_dbfile "/home/niinuska/bot/feeds.sqlite" - -# Use a HTTP proxy? 1 = yes, 0 = no -set http_proxy 0 - -# HTTP proxy address and port -set http_proxy_host "cache.inet.fi" -set http_proxy_port 800 - - -########################################################################## - -set feeds_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬|" -append feeds_ent_str "|‎||å|å|Å|Å|é|é|:|:| | " -append feeds_ent_str "|”|\"|“|\"|«|<<|»|>>|"|\"" -append feeds_ent_str "|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" -append feeds_ent_str "|ä|ä|å|ö|—|-|'|'|–|-|"|\"" -append feeds_ent_str "|||-|’|'|ü|ü|Ü|Ü|•|*|€|€" -append feeds_ent_str "|”|\"" -set html_ent [split [encoding convertfrom "utf-8" $feeds_ent_str] "|"] - -package require http -::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5" -if {$http_proxy != 0} { - ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port -} - - -proc convert_ent {udata} { - global html_ent - return [string map -nocase $html_ent [string map $html_ent $udata]] -} - -proc add_entry {uname uprefix uurl utitle} { - global currclock feeds_db nitems - set utmp [convert_ent $uurl] - if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} { - set utest "$utmp" - } else { - set utest "$uprefix$utmp" - } - - set usql "SELECT title FROM feeds WHERE url='[escape $utest]' AND feed='[escape $uname]'" - if {![feeds_db exists $usql]} { - set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[escape $uname]', $currclock, '[escape $utest]', '[escape $utitle]')" - incr nitems - if {[catch {feeds_db eval $usql} uerrmsg]} { - puts "\nError: $uerrmsg on:\n$usql" - exit 15 - } - } -} - - -proc add_rss_feed {datauri dataname dataprefix} { - if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" - return 1 - } - set upage [::http::data $utoken] - ::http::cleanup $utoken - - set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] - } - - if {$nmatches == 0} { - set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] - } - } - - if {$nmatches == 0} { - set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] - } - } - - return 0 -} - - -### Open database, etc -set nitems 0 -set currclock [clock seconds] -global feeds_db -if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} { - puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg." - exit 2 -} - - -############################################################################## -### Fetch and parse Halla-aho's blog page data -set datauri "http://www.halla-aho.com/scripta/"; -set dataname "Mestari" -if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" -} else { - set upage [::http::data $utoken] - ::http::cleanup $utoken - - set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] - } - - set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] - } -} - - -### The Adventurers -set datauri "http://www.peldor.com/chapters/index_sidebar.html"; -set dataname "The Adventurers" -if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" -} else { - set upage [::http::data $utoken] - ::http::cleanup $utoken - - set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] - } -} - - -### Order of the Stick -set datauri "http://www.giantitp.com/comics/oots.html"; -set dataname "OOTS" -if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { - puts "Error getting $datauri: $uerrmsg" -} else { - set upage [::http::data $utoken] - ::http::cleanup $utoken - - set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage] - set nmatches [llength $umatches] - for {set n 0} {$n < $nmatches} {incr n 3} { - add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] - } -} - - -### Generic RSS-feed fetching -#add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" - -add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" - -add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" - -add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" - -#add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" - - - -### Close database -feeds_db close - -puts "$nitems new items."