diff hae_feedit.tcl @ 0:1c4e2814cd41

Initial import.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 21 Sep 2010 13:12:49 +0300
parents
children bdb2b1fd6601
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hae_feedit.tcl	Tue Sep 21 13:12:49 2010 +0300
@@ -0,0 +1,191 @@
+#!/usr/bin/tclsh
+############################################################################
+#
+# FeedCheck fetcher v0.7 by ccr/TNSP <ccr@tnsp.org>
+# (C) Copyright 2008-2010 Tecnic Software productions (TNSP) 
+#
+# This script is freely distributable under GNU GPL (version 2) license.
+#
+############################################################################
+
+# Datafile, MUST be set to same as in feeds.tcl
+set datafile "/home/niinuska/bot/data.feeds"
+
+# Use a HTTP proxy? 1 = yes, 0 = no
+set http_proxy 0
+
+# HTTP proxy address and port
+set http_proxy_host "cache.inet.fi"
+set http_proxy_port 800
+
+
+##############################################################################
+set html_ent [split "&#160;| |&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>|ä|ä|ö|ö|Ä|Ä" "|"]
+
+package require http
+::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5"
+if {$http_proxy != 0} {
+	::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
+}
+
+
+proc convert_ent {udata} {
+	global html_ent
+	return [string map $html_ent $udata]
+}
+
+
+proc add_entry {uname uprefix uurl udesc} {
+	global entries isentries newurls currclock
+	set utest "$uprefix[convert_ent $uurl]"
+	set isentries($utest) 1
+	if {[catch {set utmp $entries($utest)}]} {
+		set entries($utest) [list $currclock $uname $utest [convert_ent $udesc]]
+		incr newurls
+	}
+}
+
+
+proc add_rss_feed {datauri dataname dataprefix} {
+	if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
+		puts "Error getting $datauri: $uerrmsg"
+		return 1
+	}
+	set upage [::http::data $utoken]
+	::http::cleanup $utoken
+	
+	set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
+	}
+	
+	if {$nmatches == 0} {
+	set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
+	}
+	}
+
+	if {$nmatches == 0} {
+	set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
+	}
+	}
+	return 0
+}
+
+
+##############################################################################
+### Luetaan vanhat paskat
+set oldurls 0
+set newurls 0
+if {![catch {set ufile [open $datafile r 0600]} uerrmsg]} {
+	while {![eof $ufile]} {
+		gets $ufile uline
+		set urec [split $uline "½"]
+		if {[llength $urec] == 4} {
+			set entries([lindex $urec 2]) $urec
+			set isentries([lindex $urec 2]) 0
+			incr oldurls
+		}
+	}
+	close $ufile
+}
+set currclock [clock seconds]
+
+
+##############################################################################
+### Haetaan ja parsitaan Halla-ahon jutut
+set datauri "http://www.halla-aho.com/scripta/";
+set dataname "Mestari"
+if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
+	puts "Error getting $datauri: $uerrmsg"
+} else {
+	set upage [::http::data $utoken]
+	::http::cleanup $utoken
+	
+	set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
+	}
+
+	set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
+	}
+}
+
+
+### The Adventurers
+set datauri "http://www.peldor.com/chapters/index_sidebar.html";
+set dataname "The Adventurers"
+if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
+	puts "Error getting $datauri: $uerrmsg"
+} else {
+	set upage [::http::data $utoken]
+	::http::cleanup $utoken
+	
+	set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
+	}
+}
+
+
+### Order of the Stick
+set datauri "http://www.giantitp.com/comics/oots.html";
+set dataname "OOTS"
+if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
+	puts "Error getting $datauri: $uerrmsg"
+} else {
+	set upage [::http::data $utoken]
+	::http::cleanup $utoken
+	
+	set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage]
+	set nmatches [llength $umatches]
+	for {set n 0} {$n < $nmatches} {incr n 3} {
+		add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
+	}
+}
+
+
+### Hae RSS-feedit
+add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" ""
+
+#add_rss_feed "http://sektori.com/?tpl=rssNewsFeed" "Sektori" ""
+
+add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" ""
+
+add_rss_feed "http://www.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" ""
+
+add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" ""
+
+#add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" ""
+
+##############################################################################
+### Avataan tulostiedosto
+set tmpfname "$datafile.tmp"
+if {[catch {set outfile [open $tmpfname w 0600]} uerrmsg]} {
+	puts "Error opening $tmpfname for writing: $uerrmsg"
+	return 1
+}
+
+set uexpire [expr [clock seconds] - (7*24*60*60)]
+foreach {ukey udata} [array get entries] {
+#	if {$isentries($ukey) != 0 || [lindex $udata 0] >= $uexpire} {
+		puts $outfile [join $udata "½"]
+#	}
+}
+
+close $outfile
+if {[catch {file rename -force -- $tmpfname $datafile} uerrmsg]} {
+	puts "Error renaming $tmpfname to $datafile: $uerrmsg"
+}
+#puts "$newurls new entries."