Mercurial > hg > egg-tcls
annotate hae_feedit.tcl @ 135:50bf17f1ba39
Use MFCalc.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 04 Jun 2013 13:23:14 +0300 |
parents | 593874678e45 |
children | 3305e142eecc |
rev | line source |
---|---|
0 | 1 #!/usr/bin/tclsh |
1 | 2 # |
3 # NOTICE! Change above path to correct tclsh binary path! | |
4 # | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
5 ########################################################################## |
0 | 6 # |
114
593874678e45
Clarify authorship by doing sed "s/ccr\/TNSP/Matti 'ccr' Hamalainen/g".
Matti Hamalainen <ccr@tnsp.org>
parents:
69
diff
changeset
|
7 # FeedCheck fetcher v0.7 by Matti 'ccr' Hamalainen <ccr@tnsp.org> |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
8 # (C) Copyright 2008-2011 Tecnic Software productions (TNSP) |
0 | 9 # |
10 # This script is freely distributable under GNU GPL (version 2) license. | |
11 # | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
12 ########################################################################## |
0 | 13 |
14 # Datafile, MUST be set to same as in feeds.tcl | |
15 set datafile "/home/niinuska/bot/data.feeds" | |
16 | |
17 # Use a HTTP proxy? 1 = yes, 0 = no | |
18 set http_proxy 0 | |
19 | |
20 # HTTP proxy address and port | |
21 set http_proxy_host "cache.inet.fi" | |
22 set http_proxy_port 800 | |
23 | |
24 | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
25 ########################################################################## |
0 | 26 set html_ent [split " | |»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
27 | |
28 package require http | |
29 ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5" | |
30 if {$http_proxy != 0} { | |
63 | 31 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
0 | 32 } |
33 | |
34 | |
35 proc convert_ent {udata} { | |
63 | 36 global html_ent |
37 return [string map $html_ent $udata] | |
0 | 38 } |
39 | |
40 | |
41 proc add_entry {uname uprefix uurl udesc} { | |
63 | 42 global entries isentries newurls currclock |
43 set utest "$uprefix[convert_ent $uurl]" | |
44 set isentries($utest) 1 | |
45 if {[catch {set utmp $entries($utest)}]} { | |
46 set entries($utest) [list $currclock $uname $utest [convert_ent $udesc]] | |
47 incr newurls | |
48 } | |
0 | 49 } |
50 | |
51 | |
52 proc add_rss_feed {datauri dataname dataprefix} { | |
63 | 53 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { |
54 puts "Error getting $datauri: $uerrmsg" | |
55 return 1 | |
56 } | |
57 set upage [::http::data $utoken] | |
58 ::http::cleanup $utoken | |
59 | |
60 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] | |
61 set nmatches [llength $umatches] | |
62 for {set n 0} {$n < $nmatches} {incr n 3} { | |
63 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] | |
64 } | |
65 | |
66 if {$nmatches == 0} { | |
67 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] | |
68 set nmatches [llength $umatches] | |
69 for {set n 0} {$n < $nmatches} {incr n 3} { | |
70 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] | |
71 } | |
72 } | |
0 | 73 |
63 | 74 if {$nmatches == 0} { |
75 set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] | |
76 set nmatches [llength $umatches] | |
77 for {set n 0} {$n < $nmatches} {incr n 3} { | |
78 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] | |
79 } | |
80 } | |
81 return 0 | |
0 | 82 } |
83 | |
84 | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
85 ########################################################################## |
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
86 ### Read in old data |
0 | 87 set oldurls 0 |
88 set newurls 0 | |
89 if {![catch {set ufile [open $datafile r 0600]} uerrmsg]} { | |
63 | 90 while {![eof $ufile]} { |
91 gets $ufile uline | |
92 set urec [split $uline "½"] | |
93 if {[llength $urec] == 4} { | |
94 set entries([lindex $urec 2]) $urec | |
95 set isentries([lindex $urec 2]) 0 | |
96 incr oldurls | |
97 } | |
98 } | |
99 close $ufile | |
0 | 100 } |
101 set currclock [clock seconds] | |
102 | |
103 | |
104 ############################################################################## | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
105 ### Fetch and parse Halla-aho's blog page data |
0 | 106 set datauri "http://www.halla-aho.com/scripta/"; |
107 set dataname "Mestari" | |
108 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { | |
63 | 109 puts "Error getting $datauri: $uerrmsg" |
0 | 110 } else { |
63 | 111 set upage [::http::data $utoken] |
112 ::http::cleanup $utoken | |
113 | |
114 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage] | |
115 set nmatches [llength $umatches] | |
116 for {set n 0} {$n < $nmatches} {incr n 3} { | |
117 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
118 } | |
0 | 119 |
63 | 120 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage] |
121 set nmatches [llength $umatches] | |
122 for {set n 0} {$n < $nmatches} {incr n 3} { | |
123 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
124 } | |
0 | 125 } |
126 | |
127 | |
128 ### The Adventurers | |
129 set datauri "http://www.peldor.com/chapters/index_sidebar.html"; | |
130 set dataname "The Adventurers" | |
131 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { | |
63 | 132 puts "Error getting $datauri: $uerrmsg" |
0 | 133 } else { |
63 | 134 set upage [::http::data $utoken] |
135 ::http::cleanup $utoken | |
136 | |
137 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] | |
138 set nmatches [llength $umatches] | |
139 for {set n 0} {$n < $nmatches} {incr n 3} { | |
140 add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
141 } | |
0 | 142 } |
143 | |
144 | |
145 ### Order of the Stick | |
146 set datauri "http://www.giantitp.com/comics/oots.html"; | |
147 set dataname "OOTS" | |
148 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} { | |
63 | 149 puts "Error getting $datauri: $uerrmsg" |
0 | 150 } else { |
63 | 151 set upage [::http::data $utoken] |
152 ::http::cleanup $utoken | |
153 | |
154 set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage] | |
155 set nmatches [llength $umatches] | |
156 for {set n 0} {$n < $nmatches} {incr n 3} { | |
157 add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
158 } | |
0 | 159 } |
160 | |
161 | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
162 ### Generic RSS-feed fetching |
0 | 163 add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" |
164 | |
165 #add_rss_feed "http://sektori.com/?tpl=rssNewsFeed" "Sektori" "" | |
166 | |
167 add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" | |
168 | |
169 add_rss_feed "http://www.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" | |
170 | |
171 add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" | |
172 | |
173 #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" | |
174 | |
175 ############################################################################## | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
176 ### Open data file for writing |
0 | 177 set tmpfname "$datafile.tmp" |
178 if {[catch {set outfile [open $tmpfname w 0600]} uerrmsg]} { | |
63 | 179 puts "Error opening $tmpfname for writing: $uerrmsg" |
180 return 1 | |
0 | 181 } |
182 | |
183 set uexpire [expr [clock seconds] - (7*24*60*60)] | |
184 foreach {ukey udata} [array get entries] { | |
63 | 185 # if {$isentries($ukey) != 0 || [lindex $udata 0] >= $uexpire} { |
186 puts $outfile [join $udata "½"] | |
187 # } | |
0 | 188 } |
189 | |
190 close $outfile | |
191 if {[catch {file rename -force -- $tmpfname $datafile} uerrmsg]} { | |
63 | 192 puts "Error renaming $tmpfname to $datafile: $uerrmsg" |
0 | 193 } |
194 #puts "$newurls new entries." |