Mercurial > hg > egg-tcls
diff hae_feedit.tcl @ 0:1c4e2814cd41
Initial import.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 21 Sep 2010 13:12:49 +0300 |
parents | |
children | bdb2b1fd6601 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hae_feedit.tcl Tue Sep 21 13:12:49 2010 +0300 @@ -0,0 +1,191 @@ +#!/usr/bin/tclsh +############################################################################ +# +# FeedCheck fetcher v0.7 by ccr/TNSP <ccr@tnsp.org> +# (C) Copyright 2008-2010 Tecnic Software productions (TNSP) +# +# This script is freely distributable under GNU GPL (version 2) license. +# +############################################################################ + +# Datafile, MUST be set to same as in feeds.tcl +set datafile "/home/niinuska/bot/data.feeds" + +# Use a HTTP proxy? 1 = yes, 0 = no +set http_proxy 0 + +# HTTP proxy address and port +set http_proxy_host "cache.inet.fi" +set http_proxy_port 800 + + +############################################################################## +set html_ent [split " | |»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] + +package require http +::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5" +if {$http_proxy != 0} { + ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port +} + + +proc convert_ent {udata} { + global html_ent + return [string map $html_ent $udata] +} + + +proc add_entry {uname uprefix uurl udesc} { + global entries isentries newurls currclock + set utest "$uprefix[convert_ent $uurl]" + set isentries($utest) 1 + if {[catch {set utmp $entries($utest)}]} { + set entries($utest) [list $currclock $uname $utest [convert_ent $udesc]] + incr newurls + } +} + + +proc add_rss_feed {datauri dataname dataprefix} { + if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" + return 1 + } + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] + } + + if {$nmatches == 0} { + set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] + } + } + + if {$nmatches == 0} { + set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] + } + } + return 0 +} + + +############################################################################## +### Luetaan vanhat paskat +set oldurls 0 +set newurls 0 +if {![catch {set ufile [open $datafile r 0600]} uerrmsg]} { + while {![eof $ufile]} { + gets $ufile uline + set urec [split $uline "½"] + if {[llength $urec] == 4} { + set entries([lindex $urec 2]) $urec + set isentries([lindex $urec 2]) 0 + incr oldurls + } + } + close $ufile +} +set currclock [clock seconds] + + +############################################################################## +### Haetaan ja parsitaan Halla-ahon jutut +set datauri "http://www.halla-aho.com/scripta/"; +set dataname "Mestari" +if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" +} else { + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } + + set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } +} + + +### The Adventurers +set datauri "http://www.peldor.com/chapters/index_sidebar.html"; +set dataname "The Adventurers" +if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" +} else { + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } +} + + +### Order of the Stick +set datauri "http://www.giantitp.com/comics/oots.html"; +set dataname "OOTS" +if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { + puts "Error getting $datauri: $uerrmsg" +} else { + set upage [::http::data $utoken] + ::http::cleanup $utoken + + set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage] + set nmatches [llength $umatches] + for {set n 0} {$n < $nmatches} {incr n 3} { + add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] + } +} + + +### Hae RSS-feedit +add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" + +#add_rss_feed "http://sektori.com/?tpl=rssNewsFeed" "Sektori" "" + +add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" + +add_rss_feed "http://www.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" + +add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" + +#add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" + +############################################################################## +### Avataan tulostiedosto +set tmpfname "$datafile.tmp" +if {[catch {set outfile [open $tmpfname w 0600]} uerrmsg]} { + puts "Error opening $tmpfname for writing: $uerrmsg" + return 1 +} + +set uexpire [expr [clock seconds] - (7*24*60*60)] +foreach {ukey udata} [array get entries] { +# if {$isentries($ukey) != 0 || [lindex $udata 0] >= $uexpire} { + puts $outfile [join $udata "½"] +# } +} + +close $outfile +if {[catch {file rename -force -- $tmpfname $datafile} uerrmsg]} { + puts "Error renaming $tmpfname to $datafile: $uerrmsg" +} +#puts "$newurls new entries."