0
|
1 #!/usr/bin/tclsh
|
1
|
2 #
|
|
3 # NOTICE! Change above path to correct tclsh binary path!
|
|
4 #
|
0
|
5 ############################################################################
|
|
6 #
|
|
7 # FeedCheck fetcher v0.7 by ccr/TNSP <ccr@tnsp.org>
|
|
8 # (C) Copyright 2008-2010 Tecnic Software productions (TNSP)
|
|
9 #
|
|
10 # This script is freely distributable under GNU GPL (version 2) license.
|
|
11 #
|
|
12 ############################################################################
|
|
13
|
|
14 # Datafile, MUST be set to same as in feeds.tcl
|
|
15 set datafile "/home/niinuska/bot/data.feeds"
|
|
16
|
|
17 # Use a HTTP proxy? 1 = yes, 0 = no
|
|
18 set http_proxy 0
|
|
19
|
|
20 # HTTP proxy address and port
|
|
21 set http_proxy_host "cache.inet.fi"
|
|
22 set http_proxy_port 800
|
|
23
|
|
24
|
|
25 ##############################################################################
|
|
26 set html_ent [split " | |»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"]
|
|
27
|
|
28 package require http
|
|
29 ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5"
|
|
30 if {$http_proxy != 0} {
|
|
31 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
|
|
32 }
|
|
33
|
|
34
|
|
35 proc convert_ent {udata} {
|
|
36 global html_ent
|
|
37 return [string map $html_ent $udata]
|
|
38 }
|
|
39
|
|
40
|
|
41 proc add_entry {uname uprefix uurl udesc} {
|
|
42 global entries isentries newurls currclock
|
|
43 set utest "$uprefix[convert_ent $uurl]"
|
|
44 set isentries($utest) 1
|
|
45 if {[catch {set utmp $entries($utest)}]} {
|
|
46 set entries($utest) [list $currclock $uname $utest [convert_ent $udesc]]
|
|
47 incr newurls
|
|
48 }
|
|
49 }
|
|
50
|
|
51
|
|
52 proc add_rss_feed {datauri dataname dataprefix} {
|
|
53 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
|
|
54 puts "Error getting $datauri: $uerrmsg"
|
|
55 return 1
|
|
56 }
|
|
57 set upage [::http::data $utoken]
|
|
58 ::http::cleanup $utoken
|
|
59
|
|
60 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
|
|
61 set nmatches [llength $umatches]
|
|
62 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
63 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
|
|
64 }
|
|
65
|
|
66 if {$nmatches == 0} {
|
|
67 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
|
|
68 set nmatches [llength $umatches]
|
|
69 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
70 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
|
|
71 }
|
|
72 }
|
|
73
|
|
74 if {$nmatches == 0} {
|
|
75 set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
|
|
76 set nmatches [llength $umatches]
|
|
77 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
78 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
|
|
79 }
|
|
80 }
|
|
81 return 0
|
|
82 }
|
|
83
|
|
84
|
|
85 ##############################################################################
|
|
86 ### Luetaan vanhat paskat
|
|
87 set oldurls 0
|
|
88 set newurls 0
|
|
89 if {![catch {set ufile [open $datafile r 0600]} uerrmsg]} {
|
|
90 while {![eof $ufile]} {
|
|
91 gets $ufile uline
|
|
92 set urec [split $uline "½"]
|
|
93 if {[llength $urec] == 4} {
|
|
94 set entries([lindex $urec 2]) $urec
|
|
95 set isentries([lindex $urec 2]) 0
|
|
96 incr oldurls
|
|
97 }
|
|
98 }
|
|
99 close $ufile
|
|
100 }
|
|
101 set currclock [clock seconds]
|
|
102
|
|
103
|
|
104 ##############################################################################
|
|
105 ### Haetaan ja parsitaan Halla-ahon jutut
|
|
106 set datauri "http://www.halla-aho.com/scripta/";
|
|
107 set dataname "Mestari"
|
|
108 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
|
|
109 puts "Error getting $datauri: $uerrmsg"
|
|
110 } else {
|
|
111 set upage [::http::data $utoken]
|
|
112 ::http::cleanup $utoken
|
|
113
|
|
114 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage]
|
|
115 set nmatches [llength $umatches]
|
|
116 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
117 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
|
|
118 }
|
|
119
|
|
120 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage]
|
|
121 set nmatches [llength $umatches]
|
|
122 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
123 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
|
|
124 }
|
|
125 }
|
|
126
|
|
127
|
|
128 ### The Adventurers
|
|
129 set datauri "http://www.peldor.com/chapters/index_sidebar.html";
|
|
130 set dataname "The Adventurers"
|
|
131 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
|
|
132 puts "Error getting $datauri: $uerrmsg"
|
|
133 } else {
|
|
134 set upage [::http::data $utoken]
|
|
135 ::http::cleanup $utoken
|
|
136
|
|
137 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage]
|
|
138 set nmatches [llength $umatches]
|
|
139 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
140 add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
|
|
141 }
|
|
142 }
|
|
143
|
|
144
|
|
145 ### Order of the Stick
|
|
146 set datauri "http://www.giantitp.com/comics/oots.html";
|
|
147 set dataname "OOTS"
|
|
148 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
|
|
149 puts "Error getting $datauri: $uerrmsg"
|
|
150 } else {
|
|
151 set upage [::http::data $utoken]
|
|
152 ::http::cleanup $utoken
|
|
153
|
|
154 set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage]
|
|
155 set nmatches [llength $umatches]
|
|
156 for {set n 0} {$n < $nmatches} {incr n 3} {
|
|
157 add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
|
|
158 }
|
|
159 }
|
|
160
|
|
161
|
|
162 ### Hae RSS-feedit
|
|
163 add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" ""
|
|
164
|
|
165 #add_rss_feed "http://sektori.com/?tpl=rssNewsFeed" "Sektori" ""
|
|
166
|
|
167 add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" ""
|
|
168
|
|
169 add_rss_feed "http://www.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" ""
|
|
170
|
|
171 add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" ""
|
|
172
|
|
173 #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" ""
|
|
174
|
|
175 ##############################################################################
|
|
176 ### Avataan tulostiedosto
|
|
177 set tmpfname "$datafile.tmp"
|
|
178 if {[catch {set outfile [open $tmpfname w 0600]} uerrmsg]} {
|
|
179 puts "Error opening $tmpfname for writing: $uerrmsg"
|
|
180 return 1
|
|
181 }
|
|
182
|
|
183 set uexpire [expr [clock seconds] - (7*24*60*60)]
|
|
184 foreach {ukey udata} [array get entries] {
|
|
185 # if {$isentries($ukey) != 0 || [lindex $udata 0] >= $uexpire} {
|
|
186 puts $outfile [join $udata "½"]
|
|
187 # }
|
|
188 }
|
|
189
|
|
190 close $outfile
|
|
191 if {[catch {file rename -force -- $tmpfname $datafile} uerrmsg]} {
|
|
192 puts "Error renaming $tmpfname to $datafile: $uerrmsg"
|
|
193 }
|
|
194 #puts "$newurls new entries."
|