annotate fetch_feeds.tcl @ 271:f47b41d2be64

feeds: Cosmetics.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 23 Jan 2015 10:57:47 +0200
parents 96310b1c88fa
children 9f90d6918626
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 #!/usr/bin/tclsh
1
bdb2b1fd6601 Add some comments.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
2 #
bdb2b1fd6601 Add some comments.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
3 # NOTICE! Change above path to correct tclsh binary path!
bdb2b1fd6601 Add some comments.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
4 #
268
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
5 ##############################################################################
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 #
265
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
7 # FeedCheck fetcher v0.9 by Matti 'ccr' Hamalainen <ccr@tnsp.org>
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
8 # (C) Copyright 2008-2015 Tecnic Software productions (TNSP)
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 #
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 # This script is freely distributable under GNU GPL (version 2) license.
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11 #
268
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
12 ##############################################################################
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
13 package require sqlite3
265
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
14 source [file dirname [info script]]/utillib.tcl
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
15
265
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
16 ### The configuration should be in config.feeds in same directory
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
17 ### as this script. Or change the line below to point where ever
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
18 ### you wish. See "config.feeds.example" for an example config file.
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
19 source [file dirname [info script]]/config.feeds
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21
268
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
22 ##############################################################################
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
23
146
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
24 set feeds_ent_str "&#45;|-|&#39;|'|—|-|&rlm;||&#8212;|-|&#8211;|--|&#x202a;||&#x202c;|"
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
25 append feeds_ent_str "|&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| "
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
26 append feeds_ent_str "|&#8221;|\"|&#8220;|\"|&laquo;|<<|&raquo;|>>|&quot;|\""
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
27 append feeds_ent_str "|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>"
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
28 append feeds_ent_str "|&#228;|ä|&#229;|ö|&mdash;|-|&#039;|'|&ndash;|-|&#034;|\""
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
29 append feeds_ent_str "|&#124;|-|&#8217;|'|&uuml;|ü|&Uuml;|Ü|&bull;|*|&euro;|€"
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
30 append feeds_ent_str "|&rdquo;|\""
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
31 set html_ent [split [encoding convertfrom "utf-8" $feeds_ent_str] "|"]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 package require http
271
f47b41d2be64 feeds: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 268
diff changeset
34
265
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
35 if {[info exists http_user_agent] && $http_user_agent != ""} {
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
36 ::http::config -urlencoding iso8859-1 -useragent $http_user_agent
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
37 } else {
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
38 ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5"
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
39 }
271
f47b41d2be64 feeds: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 268
diff changeset
40
268
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
41 if {[info exists http_use_proxy] && $http_use_proxy != 0} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
42 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
44
268
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
45 if {[info exists http_tls_support] && $http_tls_support != 0} {
265
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
46 package require tls
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
47 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -tls1 1 -cadir $http_tls_cadir]
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
48 }
908edc54005a feeds: Move configuration to separate file.
Matti Hamalainen <ccr@tnsp.org>
parents: 159
diff changeset
49
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
50
268
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
51 ##############################################################################
96310b1c88fa feeds: Improve config resiliency.
Matti Hamalainen <ccr@tnsp.org>
parents: 265
diff changeset
52
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53 proc convert_ent {udata} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
54 global html_ent
146
7106dd8db4de Improve entity parsing, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 145
diff changeset
55 return [string map -nocase $html_ent [string map $html_ent $udata]]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
56 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
57
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
58 proc add_entry {uname uprefix uurl utitle} {
142
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
59 global currclock feeds_db nitems
147
48460e925a8c Fix feed getter.
Matti Hamalainen <ccr@tnsp.org>
parents: 146
diff changeset
60 set utmp [convert_ent $uurl]
48460e925a8c Fix feed getter.
Matti Hamalainen <ccr@tnsp.org>
parents: 146
diff changeset
61 if {[string match "http://*" $utmp] || [string match "https://*" $utmp]} {
48460e925a8c Fix feed getter.
Matti Hamalainen <ccr@tnsp.org>
parents: 146
diff changeset
62 set utest "$utmp"
48460e925a8c Fix feed getter.
Matti Hamalainen <ccr@tnsp.org>
parents: 146
diff changeset
63 } else {
48460e925a8c Fix feed getter.
Matti Hamalainen <ccr@tnsp.org>
parents: 146
diff changeset
64 set utest "$uprefix$utmp"
48460e925a8c Fix feed getter.
Matti Hamalainen <ccr@tnsp.org>
parents: 146
diff changeset
65 }
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
66
142
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
67 set usql "SELECT title FROM feeds WHERE url='[escape $utest]' AND feed='[escape $uname]'"
140
b0648e05c855 Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 139
diff changeset
68 if {![feeds_db exists $usql]} {
142
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
69 set usql "INSERT INTO feeds (feed,utime,url,title) VALUES ('[escape $uname]', $currclock, '[escape $utest]', '[escape $utitle]')"
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
70 incr nitems
140
b0648e05c855 Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 139
diff changeset
71 if {[catch {feeds_db eval $usql} uerrmsg]} {
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
72 puts "\nError: $uerrmsg on:\n$usql"
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
73 exit 15
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
74 }
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
75 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
78
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
79 proc add_rss_feed {datauri dataname dataprefix} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
80 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
81 puts "Error getting $datauri: $uerrmsg"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
82 return 1
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
83 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
84 set upage [::http::data $utoken]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
85 ::http::cleanup $utoken
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
86
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
87 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title><..CDATA.(.\*\?)\\\]\\\]></title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
88 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
89 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
90 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
91 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
92
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
93 if {$nmatches == 0} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
94 set umatches [regexp -all -nocase -inline -- "<item>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
95 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
96 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
97 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
98 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
99 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
101 if {$nmatches == 0} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
102 set umatches [regexp -all -nocase -inline -- "<item \[^>\]*>.\*\?<title>(.\*\?)</title>.\*\?<link>(http.\*\?)</link>.\*\?</item>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
103 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
104 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
105 add_entry $dataname $dataprefix [lindex $umatches [expr $n+2]] [lindex $umatches [expr $n+1]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
106 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
107 }
143
96b42289f1e7 Fixes in feeds checker.
Matti Hamalainen <ccr@tnsp.org>
parents: 142
diff changeset
108
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
109 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
110 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
111
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
112
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
113 ### Open database, etc
142
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
114 set nitems 0
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
115 set currclock [clock seconds]
140
b0648e05c855 Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 139
diff changeset
116 global feeds_db
b0648e05c855 Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 139
diff changeset
117 if {[catch {sqlite3 feeds_db $feeds_dbfile} uerrmsg]} {
b0648e05c855 Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 139
diff changeset
118 puts "Could not open SQLite3 database '$feeds_dbfile': $uerrmsg."
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
119 exit 2
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
120 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
121
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
123 ##############################################################################
69
df3230f8aa46 Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents: 63
diff changeset
124 ### Fetch and parse Halla-aho's blog page data
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125 set datauri "http://www.halla-aho.com/scripta/";
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126 set dataname "Mestari"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
128 puts "Error getting $datauri: $uerrmsg"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129 } else {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
130 set upage [::http::data $utoken]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
131 ::http::cleanup $utoken
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
132
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
133 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\"><b>(\[^<\]+)</b>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
134 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
135 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
136 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
137 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
139 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+\.html)\">(\[^<\]\[^b\]\[^<\]+)</a>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
140 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
141 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
142 add_entry $dataname $datauri [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
143 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
144 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
145
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 ### The Adventurers
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
148 set datauri "http://www.peldor.com/chapters/index_sidebar.html";
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
149 set dataname "The Adventurers"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
151 puts "Error getting $datauri: $uerrmsg"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
152 } else {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
153 set upage [::http::data $utoken]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
154 ::http::cleanup $utoken
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
155
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
156 set umatches [regexp -all -nocase -inline -- "<a href=\"(\[^\"\]+)\">(\[^<\]+)</a>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
157 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
158 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
159 add_entry $dataname "http://www.peldor.com/" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
160 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
161 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
162
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
163
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
164 ### Order of the Stick
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
165 set datauri "http://www.giantitp.com/comics/oots.html";
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
166 set dataname "OOTS"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
167 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
168 puts "Error getting $datauri: $uerrmsg"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
169 } else {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
170 set upage [::http::data $utoken]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
171 ::http::cleanup $utoken
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
172
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
173 set umatches [regexp -all -nocase -inline -- "<a href=\"(/comics/oots\[0-9\]+\.html)\">(\[^<\]+)</a>" $upage]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
174 set nmatches [llength $umatches]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
175 for {set n 0} {$n < $nmatches} {incr n 3} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
176 add_entry $dataname "http://www.giantitp.com" [lindex $umatches [expr $n+1]] [lindex $umatches [expr $n+2]]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 1
diff changeset
177 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
178 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
179
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
180
69
df3230f8aa46 Translate some comments to english and cosmetic fixes.
Matti Hamalainen <ccr@tnsp.org>
parents: 63
diff changeset
181 ### Generic RSS-feed fetching
143
96b42289f1e7 Fixes in feeds checker.
Matti Hamalainen <ccr@tnsp.org>
parents: 142
diff changeset
182 #add_rss_feed "http://www.kaleva.fi/rss/145.xml" "Kaleva/Tiede" ""
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
183
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
184 add_rss_feed "http://www.effi.org/xml/uutiset.rss" "EFFI" ""
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
185
143
96b42289f1e7 Fixes in feeds checker.
Matti Hamalainen <ccr@tnsp.org>
parents: 142
diff changeset
186 add_rss_feed "http://static.mtv3.fi/rss/uutiset_rikos.rss" "MTV3/Rikos" ""
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
188 add_rss_feed "http://www.blastwave-comic.com/rss/blastwave.xml" "Blastwave" ""
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
189
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190 #add_rss_feed "http://lehti.samizdat.info/feed/" "Lehti" ""
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
191
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
192
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
193
139
3305e142eecc Change feed fetcher to use SQLite3 backend.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
194 ### Close database
140
b0648e05c855 Change some variable names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 139
diff changeset
195 feeds_db close
142
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
196
4c51eeba993f Rename table.
Matti Hamalainen <ccr@tnsp.org>
parents: 140
diff changeset
197 puts "$nitems new items."