annotate hae_ruoka.tcl @ 0:1c4e2814cd41

Initial import.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 21 Sep 2010 13:12:49 +0300
parents
children bdb2b1fd6601
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 #!/usr/bin/tclsh
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 #
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 # RuokaLista fetcher v1.0 by ccr/TNSP <ccr@tnsp.org>
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 # (C) Copyright 2010 Tecnic Software productions (TNSP)
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 #
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 # Datatiedosto, oltava sama kuin ruoka.tcl:n vastaava asetus
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 set datafile "/home/niinuska/bot/data.ruoka"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
12 # Käytä HTTP proxya? 1 = kyllä, 0 = ei
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
13 set http_proxy 0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
14
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
15 # HTTP proxyn osoite ja portti
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
16 set http_proxy_host "cache.inet.fi"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 set http_proxy_port 800
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
18
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
19
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20 ##############################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21 set html_ent [split "\n| |\r| |<br />| |&nbsp;| |&#160;| |&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>|ä|ä|ö|ö|Ä|Ä" "|"]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 package require http
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 ::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
25 if {$http_proxy != 0} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
26 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
27 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
28
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
29
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
30 proc convert_ent {udata} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31 global html_ent
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32 return [string map $html_ent $udata]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
36 proc add_entry {uname uday udate udesc} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37 global entries
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
38 set utest "$uname:$udate"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39 if {[catch {set utmp $entries($utest)}]} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
40 set entries($utest) [list $uname $uday $udate $udesc]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
41 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
42 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
44
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
45 proc add_amica {datauri dataname} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
46 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
47 puts "Error getting $datauri: $uerrmsg"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
48 return 1
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
49 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
50
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
51 set upage [::http::data $utoken]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
52 ::http::cleanup $utoken
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
54 set nmatches 1
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
55 while {$nmatches > 0} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
56 set umatches [regexp -nocase -inline -- "<strong>(Maanantai|Tiistai|Keskiviikko|Torstai|Perjantai|Lauantai|Sunnuntai)</strong></td>.?.?<td colspan=\"2\"><strong>(\[^<\]+)</strong></td>(.*)\$" $upage]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
57 set nmatches [llength $umatches]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
58 if {$nmatches > 3} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
59 set umat [regexp -nocase -inline -- "^(.+?)(<td colspan=\"3\">|</tbody>)" [lindex $umatches 3]]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
60 set umat [regexp -all -nocase -inline -- "<td colspan=\"\[78\]\">(.\*\?)</td>" [lindex $umat 1]]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
61 set tmp ""
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
62 foreach {ukey udata} $umat {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
63 set item [string trim [convert_ent $udata]]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
64 if {[string length $item] > 0} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
65 lappend tmp $item
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
66 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
67 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
68 add_entry $dataname [lindex $umatches 1] [lindex $umatches 2] [join $tmp "; "]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
69 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
70 set upage [lindex $umatches 3]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
71 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
72
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
73 return 0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
74 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
75
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 proc add_uniresta {datauri dataname} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77 if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
78 puts "Error getting $datauri: $uerrmsg"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
79 return 1
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
80 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
81
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
82 set upage [::http::data $utoken]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
83 ::http::cleanup $utoken
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
84
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
85 set nmatches 1
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
86 while {$nmatches > 0} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
87 set umatches [regexp -nocase -inline -- "<span class='otsikko'>(Maanantai|Tiistai|Keskiviikko|Torstai|Perjantai|Lauantai|Sunnuntai) +(\[^<\]+)</span>(.*)\$" $upage]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
88 set nmatches [llength $umatches]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
89 if {$nmatches > 3} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
90 set umat [regexp -nocase -inline -- "^(.+?)<br /><br /><br />" [lindex $umatches 3]]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 set umat [regexp -all -nocase -inline -- "(.\*\?)<br>" [lindex $umat 1]]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92 set tmp ""
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
93 foreach {ukey udata} $umat {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
94 set item [string trim [convert_ent $udata]]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
95 if {[string length $item] > 0} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
96 lappend tmp $item
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
97 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
98 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
99 add_entry $dataname [lindex $umatches 1] [lindex $umatches 2] [join $tmp "; "]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
101 set upage [lindex $umatches 3]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
103
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
104 return 0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
105 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
106
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
107
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
108 ##############################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
109 # Amica/OAMK tekniikan yksikkö
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
110 add_amica "http://www.amica.fi/kotkanpoika" "OAMK"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
111
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
112 # Oulun yliopiston Unirestat
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
113 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=2" "Aularavintola"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
114 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=3" "Discus"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
115 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=4" "Julinia"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
116 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=5" "Kastari"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=6" "Snellmania"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
118 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=7" "Pruxis"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
119 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=10" "Vanilla"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
120 add_uniresta "http://www.uniresta.fi/uniresta.php?ruokalista=11" "Minttu"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
121
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
123 ##############################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
124 ### Open result datafile and save data
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125 set tmpfname "$datafile.tmp"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126 if {[catch {set outfile [open $tmpfname w 0600]} uerrmsg]} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127 puts "Error opening $tmpfname for writing: $uerrmsg"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
128 return 1
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
130
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
131 foreach {ukey udata} [array get entries] {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
132 puts $outfile [join $udata "½"]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
133 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
134
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
135 close $outfile
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
136 if {[catch {file rename -force -- $tmpfname $datafile} uerrmsg]} {
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
137 puts "Error renaming $tmpfname to $datafile: $uerrmsg"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138 }