Mercurial > hg > egg-tcls
comparison fetch_weather.pl @ 178:9b8ec700ede4
Clean up the weather data parser backend a bit.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 03 Jun 2014 15:10:47 +0300 |
parents | e4faa3746869 |
children | 24f9f38210fe |
comparison
equal
deleted
inserted
replaced
176:eda776bcb7ed | 178:9b8ec700ede4 |
---|---|
17 "opt_tiehallinto" => 0, | 17 "opt_tiehallinto" => 0, |
18 "fmi_api_key" => "", | 18 "fmi_api_key" => "", |
19 "outfile" => "", | 19 "outfile" => "", |
20 ); | 20 ); |
21 | 21 |
22 | |
22 ### | 23 ### |
23 ### Helper functions | 24 ### Helper functions |
24 ### | 25 ### |
25 sub mlog($) | 26 sub mlog($) |
26 { | 27 { |
27 print STDERR $_[0]; | 28 print STDERR $_[0]; |
29 } | |
30 | |
31 | |
32 sub fetch_http($) | |
33 { | |
34 my $tmpAgent = LWP::UserAgent->new; | |
35 $tmpAgent->agent("Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 6.0) Opera 10.63 [en]"); | |
36 $tmpAgent->timeout(10); | |
37 | |
38 my $tmpRequest = HTTP::Request->new(GET => $_[0]); | |
39 | |
40 return $tmpAgent->request($tmpRequest); | |
41 } | |
42 | |
43 | |
44 sub parse_timestamp($$) | |
45 { | |
46 return str2time($_[0]) - str2time("00:00") + $_[1]; | |
28 } | 47 } |
29 | 48 |
30 | 49 |
31 sub str_trim($) | 50 sub str_trim($) |
32 { | 51 { |
37 } | 56 } |
38 return $str; | 57 return $str; |
39 } | 58 } |
40 | 59 |
41 | 60 |
61 ### | |
62 ### Loose HTML parser | |
63 ### | |
42 sub pop_token_a($) | 64 sub pop_token_a($) |
43 { | 65 { |
44 my $tokens = $_[0]; | 66 my $tokens = $_[0]; |
45 return shift(@$tokens); | 67 return shift(@$tokens); |
46 } | 68 } |
157 } | 179 } |
158 return undef; | 180 return undef; |
159 } | 181 } |
160 | 182 |
161 | 183 |
184 ### | |
185 ### Helper functions for locating/traversing nodes | |
186 ### in the parsed data tree structure. | |
187 ### | |
162 sub fnode($$) | 188 sub fnode($$) |
163 { | 189 { |
164 return html_find_node($_[0], $_[1], ""); | 190 return html_find_node($_[0], $_[1], ""); |
165 } | 191 } |
166 | 192 |
169 { | 195 { |
170 return html_find_node($_[0], $_[1], $_[2]); | 196 return html_find_node($_[0], $_[1], $_[2]); |
171 } | 197 } |
172 | 198 |
173 | 199 |
174 sub fetch_http($) | |
175 { | |
176 my $tmpAgent = LWP::UserAgent->new; | |
177 $tmpAgent->agent("Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 6.0) Opera 10.63 [en]"); | |
178 $tmpAgent->timeout(10); | |
179 | |
180 my $tmpRequest = HTTP::Request->new(GET => $_[0]); | |
181 | |
182 return $tmpAgent->request($tmpRequest); | |
183 } | |
184 | |
185 | |
186 sub get_node($$$) | 200 sub get_node($$$) |
187 { | 201 { |
188 return defined($_[0]->[$_[2]]{"nodes"}[0]{$_[1]}) ? $_[0]->[$_[2]]{"nodes"}[0]{$_[1]} : ""; | 202 return defined($_[0]->[$_[2]]{"nodes"}[0]{$_[1]}) ? $_[0]->[$_[2]]{"nodes"}[0]{$_[1]} : ""; |
189 } | 203 } |
190 | 204 |
191 | 205 |
192 sub parse_timestamp($$) | 206 ### |
193 { | 207 ### Configuration handling |
194 return str2time($_[0]) - str2time("00:00") + $_[1]; | |
195 } | |
196 | |
197 | |
198 ### | |
199 ### Configuration | |
200 ### | 208 ### |
201 sub opt_chk_bool($) | 209 sub opt_chk_bool($) |
202 { | 210 { |
203 if (defined($settings{$_[0]})) | 211 if (defined($settings{$_[0]})) |
204 { | 212 { |
288 die( | 296 die( |
289 "Weather Fetch v0.1 by ccr/TNSP <ccr\@tnsp.org>\n". | 297 "Weather Fetch v0.1 by ccr/TNSP <ccr\@tnsp.org>\n". |
290 "Usage: $0 <config file>\n" | 298 "Usage: $0 <config file>\n" |
291 ) unless scalar(@ARGV) >= 1; | 299 ) unless scalar(@ARGV) >= 1; |
292 | 300 |
293 opt_read_config(shift) == 0 or exit; | 301 my $cfgfile = shift; |
294 | 302 opt_read_config($cfgfile) == 0 or die("Errors while parsing configuration file '".$cfgfile."'.\n"); |
295 | 303 |
296 ### Fetch tiehallinto data | 304 |
305 ### | |
306 ### Fetch tiehallinto road weather measurement data | |
307 ### | |
297 if (opt_chk_bool("opt_tiehallinto")) | 308 if (opt_chk_bool("opt_tiehallinto")) |
298 { | 309 { |
299 for (my $i = 1; $i <= 22; $i++) | 310 for (my $i = 1; $i <= 22; $i++) |
300 { | 311 { |
301 my $res = fetch_http("http://alk.tiehallinto.fi/alk/tiesaa/tiesaa_maak_".$i.".html"); | 312 my $res = fetch_http("http://alk.tiehallinto.fi/alk/tiesaa/tiesaa_maak_".$i.".html"); |
348 } | 359 } |
349 } | 360 } |
350 } | 361 } |
351 } | 362 } |
352 | 363 |
364 | |
365 ### | |
353 ### Fetch FMI data | 366 ### Fetch FMI data |
367 ### | |
354 if (opt_chk_bool("opt_fmi")) | 368 if (opt_chk_bool("opt_fmi")) |
355 { | 369 { |
356 die("FMI data scrape enabled, but no API key set.\n") unless opt_chk_valid("fmi_api_key", 10); | 370 die("FMI data scrape enabled, but no API key set.\n") unless opt_chk_valid("fmi_api_key", 10); |
357 | 371 |
358 my $res = fetch_http("http://data.fmi.fi/fmi-apikey/".opt_get("fmi_api_key"). | 372 my $res = fetch_http("http://data.fmi.fi/fmi-apikey/".opt_get("fmi_api_key"). |
390 } | 404 } |
391 } | 405 } |
392 } | 406 } |
393 | 407 |
394 | 408 |
409 ### | |
395 ### Output | 410 ### Output |
411 ### | |
396 if (opt_chk_valid("outfile", 1)) { | 412 if (opt_chk_valid("outfile", 1)) { |
397 open(STDOUT, '>', opt_get("outfile")) or die("Could not open output file '".opt_get("outfile")."'.\n"); | 413 open(STDOUT, '>', opt_get("outfile")) or die("Could not open output file '".opt_get("outfile")."'.\n"); |
398 } | 414 } |
399 | 415 |
400 binmode STDOUT, ':encoding(utf-8)'; | 416 binmode STDOUT, ':encoding(utf-8)'; |