Mercurial > hg > egg-tcls
annotate fetch_feeds.tcl @ 424:825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 08 Jan 2017 03:55:55 +0200 |
parents | 44c9128097cd |
children | e5810c52d376 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/tclsh |
1 | 2 # |
3 # NOTICE! Change above path to correct tclsh binary path! | |
4 # | |
268
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
5 ############################################################################## |
0 | 6 # |
323 | 7 # FeedCheck fetcher v1.0 by Matti 'ccr' Hamalainen <ccr@tnsp.org> |
265
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
8 # (C) Copyright 2008-2015 Tecnic Software productions (TNSP) |
0 | 9 # |
10 # This script is freely distributable under GNU GPL (version 2) license. | |
11 # | |
268
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
12 ############################################################################## |
0 | 13 |
265
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
14 ### The configuration should be in config.feeds in same directory |
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
15 ### as this script. Or change the line below to point where ever |
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
16 ### you wish. See "config.feeds.example" for an example config file. |
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
17 source [file dirname [info script]]/config.feeds |
0 | 18 |
422
880a07485275
Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents:
350
diff
changeset
|
19 ### Required utillib.tcl |
880a07485275
Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents:
350
diff
changeset
|
20 source [file dirname [info script]]/utillib.tcl |
880a07485275
Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents:
350
diff
changeset
|
21 |
0 | 22 |
268
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
23 ############################################################################## |
139
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
24 |
423
44c9128097cd
feeds: Remember to require sqlite3 package.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
25 package require sqlite3 |
0 | 26 package require http |
271 | 27 |
265
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
28 if {[info exists http_user_agent] && $http_user_agent != ""} { |
296 | 29 ::http::config -urlencoding utf8 -useragent $http_user_agent |
265
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
30 } else { |
322 | 31 ::http::config -urlencoding utf8 -useragent "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0" |
265
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
32 } |
271 | 33 |
268
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
34 if {[info exists http_use_proxy] && $http_use_proxy != 0} { |
63 | 35 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
0 | 36 } |
37 | |
268
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
38 if {[info exists http_tls_support] && $http_tls_support != 0} { |
265
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
39 package require tls |
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
40 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -tls1 1 -cadir $http_tls_cadir] |
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
41 } |
908edc54005a
feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents:
159
diff
changeset
|
42 |
0 | 43 |
268
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
44 ############################################################################## |
96310b1c88fa
feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents:
265
diff
changeset
|
45 |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
46 proc fetch_dorequest { urlStr urlStatus urlSCode urlCode urlData urlMeta } { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
47 upvar 1 $urlStatus ustatus |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
48 upvar 1 $urlSCode uscode |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
49 upvar 1 $urlCode ucode |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
50 upvar 1 $urlData udata |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
51 upvar 1 $urlMeta umeta |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
52 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
53 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers {Accept-Encoding identity}]} uerrmsg]} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
54 puts "HTTP request failed: $uerrmsg" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
55 return 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
56 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
57 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
58 set ustatus [::http::status $utoken] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
59 if {$ustatus == "timeout"} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
60 puts "HTTP request timed out ($urlStr)" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
61 return 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
62 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
63 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
64 if {$ustatus != "ok"} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
65 puts "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
66 return 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
67 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
68 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
69 set ustatus [::http::status $utoken] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
70 set uscode [::http::code $utoken] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
71 set ucode [::http::ncode $utoken] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
72 set udata [::http::data $utoken] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
73 array set umeta [::http::meta $utoken] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
74 ::http::cleanup $utoken |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
75 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
76 return 1 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
77 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
78 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
79 |
139
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
80 proc add_entry {uname uprefix uurl utitle} { |
142 | 81 global currclock feeds_db nitems |
292
9f90d6918626
feeds: Also use the html entity conversion from utillib here.
Matti Hamalainen <ccr@tnsp.org>
parents:
271
diff
changeset
|
82 set utmp [utl_convert_html_ent $uurl] |
147 | 83 if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} { |
84 set utest "$utmp" | |
85 } else { | |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
86 if {[string range $uprefix end end] != "/" && [string range $utmp 0 0] != "/"} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
87 set utest "$uprefix/$utmp" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
88 } else { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
89 set utest "$uprefix$utmp" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
90 } |
147 | 91 } |
139
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
92 |
296 | 93 set usql "SELECT title FROM feeds WHERE url='[utl_escape $utest]' AND feed='[utl_escape $uname]'" |
140
b0648e05c855
Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents:
139
diff
changeset
|
94 if {![feeds_db exists $usql]} { |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
95 # puts "NEW: $utest : $utitle" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
96 set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[utl_escape $uname]', $currclock, '[utl_escape $utest]', '[utl_escape [utl_convert_html_ent $utitle]]')" |
142 | 97 incr nitems |
140
b0648e05c855
Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents:
139
diff
changeset
|
98 if {[catch {feeds_db eval $usql} uerrmsg]} { |
139
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
99 puts "\nError: $uerrmsg on:\n$usql" |
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
100 exit 15 |
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
101 } |
63 | 102 } |
0 | 103 } |
104 | |
105 | |
106 proc add_rss_feed {datauri dataname dataprefix} { | |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
107 if {[catch {set utoken [::http::geturl $datauri -binary 1 -timeout 6000 -headers {Accept-Encoding identity}]} uerrmsg]} { |
63 | 108 puts "Error getting $datauri: $uerrmsg" |
109 return 1 | |
110 } | |
111 set upage [::http::data $utoken] | |
112 ::http::cleanup $utoken | |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
423
diff
changeset
|
113 |
63 | 114 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] |
115 set nmatches [llength $umatches] | |
116 for {set n 0} {$n < $nmatches} {incr n 3} { | |
117 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] | |
118 } | |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
423
diff
changeset
|
119 |
63 | 120 if {$nmatches == 0} { |
121 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] | |
122 set nmatches [llength $umatches] | |
123 for {set n 0} {$n < $nmatches} {incr n 3} { | |
124 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] | |
125 } | |
126 } | |
0 | 127 |
63 | 128 if {$nmatches == 0} { |
129 set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage] | |
130 set nmatches [llength $umatches] | |
131 for {set n 0} {$n < $nmatches} {incr n 3} { | |
132 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]] | |
133 } | |
134 } | |
143 | 135 |
63 | 136 return 0 |
0 | 137 } |
138 | |
139 | |
140 ############################################################################## | |
69
df3230f8aa46
Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents:
63
diff
changeset
|
141 ### Fetch and parse Halla-aho's blog page data |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
142 proc fetch_halla_aho { } { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
143 set datauri "http://www.halla-aho.com/scripta/"; |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
144 set dataname "Mestari" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
145 if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
146 return 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
147 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
148 |
63 | 149 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage] |
150 set nmatches [llength $umatches] | |
151 for {set n 0} {$n < $nmatches} {incr n 3} { | |
152 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
153 } | |
0 | 154 |
63 | 155 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage] |
156 set nmatches [llength $umatches] | |
157 for {set n 0} {$n < $nmatches} {incr n 3} { | |
158 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
159 } | |
0 | 160 } |
161 | |
162 | |
163 ### The Adventurers | |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
164 proc fetch_adventurers { } { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
165 set datauri "http://www.peldor.com/chapters/index_sidebar.html"; |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
166 set dataname "The Adventurers" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
167 if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
168 return 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
169 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
170 |
63 | 171 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] |
172 set nmatches [llength $umatches] | |
173 for {set n 0} {$n < $nmatches} {incr n 3} { | |
174 add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
175 } | |
0 | 176 } |
177 | |
178 | |
179 ### Order of the Stick | |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
180 proc fetch_oots { } { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
181 set datauri "http://www.giantitp.com/comics/oots.html"; |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
182 set dataname "OOTS" |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
183 if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
184 return 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
185 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
186 |
63 | 187 set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage] |
188 set nmatches [llength $umatches] | |
189 for {set n 0} {$n < $nmatches} {incr n 3} { | |
190 add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]] | |
191 } | |
0 | 192 } |
193 | |
194 | |
350
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
195 ### Poliisi tiedotteet |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
196 proc fetch_poliisi { datauri dataname dataprefix } { |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
197 if {![fetch_dorequest $datauri ustatus uscode ucode upage umeta]} { |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
198 return 0 |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
199 } |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
200 |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
201 set umatches [regexp -all -nocase -inline -- "<div class=\"channelitem\"><div class=\"date\">(.*?)</div><a class=\"article\" href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage] |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
202 set nmatches [llength $umatches] |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
203 for {set n 0} {$n < $nmatches} {incr n 4} { |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
204 set stmp [string trim [lindex $umatches [expr $n+3]]] |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
205 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] "[lindex $umatches [expr $n+1]]: $stmp" |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
206 } |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
207 } |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
208 |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
209 |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
210 |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
211 |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
212 ### Open database, etc |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
213 set nitems 0 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
214 set currclock [clock seconds] |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
215 global feeds_db |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
216 if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} { |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
217 puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg." |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
218 exit 2 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
219 } |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
220 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
221 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
222 ### Fetch the feeds |
350
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
223 fetch_poliisi "http://www.poliisi.fi/oulu/tiedotteet/1/0?all1/0" "Poliisi/Oulu" "http://www.poliisi.fi" |
51c08336d7b1
feeds: Add support for Poliisi.fi information reports.
Matti Hamalainen <ccr@tnsp.org>
parents:
323
diff
changeset
|
224 |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
225 fetch_halla_aho |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
226 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
227 fetch_adventurers |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
228 |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
229 fetch_oots |
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
230 |
143 | 231 #add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" "" |
0 | 232 |
233 add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" "" | |
234 | |
143 | 235 add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" "" |
0 | 236 |
321
d8b957796121
feeds: Refactor the feeds fetching.
Matti Hamalainen <ccr@tnsp.org>
parents:
296
diff
changeset
|
237 #add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" "" |
0 | 238 |
239 #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" "" | |
240 | |
139
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
241 |
3305e142eecc
Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents:
114
diff
changeset
|
242 ### Close database |
140
b0648e05c855
Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents:
139
diff
changeset
|
243 feeds_db close |
142 | 244 |
245 puts "$nitems new items." |