comparison fetch_weather.pl @ 164:d5a0e4248f3e

weather: Implement configuration file support for weather fetcher backend.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 02 Jun 2014 20:18:59 +0300
parents 4f8a163b2bc1
children c8d5f3a7c4b7
comparison
equal deleted inserted replaced
163:bee5cd89d41c 164:d5a0e4248f3e
6 use Text::Iconv; 6 use Text::Iconv;
7 use Date::Parse; 7 use Date::Parse;
8 use Data::Dumper; 8 use Data::Dumper;
9 9
10 ### 10 ###
11 ### Settings 11 ### Globals
12 ### 12 ###
13 my $opt_outfile = "/home/niinuska/bot/weather.data"; 13 my %settings = (
14 "opt_fmi" => 0,
15 "opt_tiehallinto" => 0,
16 "fmi_api_key" => "",
17 "outfile" => "",
18 );
14 19
15 ### 20 ###
16 ### Helper functions 21 ### Helper functions
17 ### 22 ###
23 sub mlog($)
24 {
25 print STDERR $_[0];
26 }
27
28
18 sub str_trim($) 29 sub str_trim($)
19 { 30 {
20 my $str = $_[0]; 31 my $str = $_[0];
21 if (defined($str)) { 32 if (defined($str)) {
22 $str =~ s/^\s*//; 33 $str =~ s/^\s*//;
181 return str2time($_[0]) - str2time("00:00") + $_[1]; 192 return str2time($_[0]) - str2time("00:00") + $_[1];
182 } 193 }
183 194
184 195
185 ### 196 ###
197 ### Configuration
198 ###
199 sub opt_chk_bool($)
200 {
201 if (defined($settings{$_[0]}))
202 {
203 my $val = $settings{$_[0]};
204 return ($val == 1 || $val eq "true" || $val eq "on" || $val eq "1");
205 }
206 else
207 {
208 return 0;
209 }
210 }
211
212
213 sub opt_chk_valid($$)
214 {
215 if (defined($settings{$_[0]}))
216 {
217 my $val = $settings{$_[0]};
218 return length($val) >= $_[1];
219 }
220 else
221 {
222 return 0;
223 }
224 }
225
226
227 sub opt_get($)
228 {
229 if (defined($settings{$_[0]}))
230 {
231 return $settings{$_[0]};
232 }
233 else
234 {
235 return undef;
236 }
237 }
238
239
240 sub opt_read_config($)
241 {
242 my $filename = $_[0];
243 my $errors = 0;
244 my $line = 0;
245
246 open(CONFFILE, "<", $filename) or die("Could not open configuration '".$filename."'!\n");
247 while (<CONFFILE>)
248 {
249 $line++;
250 chomp;
251 if (/(^\s*#|^\s*$)/) {
252 # Ignore comments and empty lines
253 } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*(\d+),?\s*$/) {
254 my $key = lc($1);
255 my $value = $2;
256 if (defined($settings{$key})) {
257 $settings{$key} = $value;
258 } else {
259 mlog("[$filename:$line] Unknown setting '$key' = $value\n");
260 $errors = 1;
261 }
262 } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*\"(.*?)\",?\s*$/) {
263 my $key = lc($1);
264 my $value = $2;
265 if (defined($settings{$key})) {
266 $settings{$key} = $value;
267 } else {
268 mlog("[$filename:$line] Unknown setting '$key' = '$value'\n");
269 $errors = 1;
270 }
271 } else {
272 mlog("[$filename:$line] Syntax error: $_\n");
273 $errors = 1;
274 }
275 }
276 close(CONFFILE);
277 return $errors;
278 }
279
280
281 ###
186 ### Main program begins 282 ### Main program begins
187 ### 283 ###
188 my $weatherdata = {}; 284 my $weatherdata = {};
189 285
286 die(
287 "Weather Fetch v0.1 by ccr/TNSP <ccr\@tnsp.org>\n".
288 "Usage: $0 <config file>\n"
289 ) unless scalar(@ARGV) >= 1;
290
291 opt_read_config(shift) == 0 or exit;
292
293
190 ### Fetch tiehallinto data 294 ### Fetch tiehallinto data
191 for (my $i = 1; $i <= 22; $i++) 295 if (opt_chk_bool("opt_tiehallinto"))
192 { 296 {
193 my $res = fetch_http("http://alk.tiehallinto.fi/alk/tiesaa/tiesaa_maak_".$i.".html"); 297 for (my $i = 1; $i <= 22; $i++)
298 {
299 my $res = fetch_http("http://alk.tiehallinto.fi/alk/tiesaa/tiesaa_maak_".$i.".html");
300 if ($res->code >= 200 && $res->code <= 201)
301 {
302 my $data = $res->decoded_content;
303
304 # Filter out crap tags we don't want or need
305 $data =~ s/\n/§/g;
306 $data =~ s/<!--.*?-->//ig;
307 $data =~ s/<map[^>]*>.*?<\/map>//ig;
308 $data =~ s/<form[^>]*>.*?<\/form>//ig;
309 $data =~ s/<script[^>]*>.*?<\/script>//ig;
310 $data =~ s/<meta[^>]*>//ig;
311 $data =~ s/<font[^>]*>//ig;
312 $data =~ s/<\/font>//ig;
313 $data =~ s/<span[^>]*>//ig;
314 $data =~ s/<\/span>//ig;
315 $data =~ s/<\/?b>//ig;
316
317 $data =~ s/<br>//ig;
318 $data =~ s/&nbsp;/ /ig;
319 $data =~ s/§/\n/g;
320
321 # Parse the HTML mess
322 my $otree = parse_html($data);
323
324 # Find our desired element nodes
325 my $odata = fnodea(fnode($otree, "body"), "div", "class=elementc");
326 my $oupdate = fnode($odata, "p");
327 my $time_base = str2time("00:00");
328 if ($oupdate) {
329 my $tmp = $oupdate->{"nodes"}[0]{"text"};
330 if ($tmp =~ /:\s+(\d\d\.\d\d\.\d\d\d\d)\s+(\d\d:\d\d)/) {
331 $time_base = str2time($1);
332 }
333 }
334
335 my $oelems = fnode($odata, "table")->{"nodes"};
336 for (my $n = 1; $n < scalar(@$oelems); $n++)
337 {
338 my $fdata = @$oelems[$n]->{"nodes"};
339 $weatherdata->{$fdata->[0]{"nodes"}[0]{"text"}} =
340 [
341 parse_timestamp(get_node($fdata, "text", 1), $time_base),
342 get_node($fdata, "text", 2),
343 get_node($fdata, "text", 3),
344 get_node($fdata, "text", 4),
345 get_node($fdata, "text", 5),
346 ];
347 }
348 }
349 }
350 }
351
352 ### Fetch FMI data
353 if (opt_chk_bool("opt_fmi"))
354 {
355 die("FMI data scrape enabled, but no API key set.\n") unless opt_chk_valid("fmi_api_key", 10);
356
357 my $res = fetch_http("http://data.fmi.fi/fmi-apikey/".opt_get("fmi_api_key").
358 "/wfs?request=getFeature&storedquery_id=fmi::observations::weather::cities::multipointcoverage".
359 "&parameters=temperature,humidity");
360
194 if ($res->code >= 200 && $res->code <= 201) 361 if ($res->code >= 200 && $res->code <= 201)
195 { 362 {
196 my $data = $res->decoded_content; 363 my $xml = XMLin($res->decoded_content,
197 364 KeyAttr => { server => 'name' },
198 # Filter out crap tags we don't want or need 365 ForceArray => [ 'server', 'address' ]);
199 $data =~ s/\n/§/g; 366 }
200 $data =~ s/<!--.*?-->//ig; 367 }
201 $data =~ s/<map[^>]*>.*?<\/map>//ig; 368
202 $data =~ s/<form[^>]*>.*?<\/form>//ig;
203 $data =~ s/<script[^>]*>.*?<\/script>//ig;
204 $data =~ s/<meta[^>]*>//ig;
205 $data =~ s/<font[^>]*>//ig;
206 $data =~ s/<\/font>//ig;
207 $data =~ s/<span[^>]*>//ig;
208 $data =~ s/<\/span>//ig;
209 $data =~ s/<\/?b>//ig;
210
211 $data =~ s/<br>//ig;
212 $data =~ s/&nbsp;/ /ig;
213 $data =~ s/§/\n/g;
214
215 # Parse the HTML mess
216 my $otree = parse_html($data);
217
218 # Find our desired element nodes
219 my $odata = fnodea(fnode($otree, "body"), "div", "class=elementc");
220 my $oupdate = fnode($odata, "p");
221 my $time_base = str2time("00:00");
222 if ($oupdate) {
223 my $tmp = $oupdate->{"nodes"}[0]{"text"};
224 if ($tmp =~ /:\s+(\d\d\.\d\d\.\d\d\d\d)\s+(\d\d:\d\d)/) {
225 $time_base = str2time($1);
226 }
227 }
228
229 my $oelems = fnode($odata, "table")->{"nodes"};
230 for (my $n = 1; $n < scalar(@$oelems); $n++)
231 {
232 my $fdata = @$oelems[$n]->{"nodes"};
233 $weatherdata->{$fdata->[0]{"nodes"}[0]{"text"}} =
234 [
235 parse_timestamp(get_node($fdata, "text", 1), $time_base),
236 get_node($fdata, "text", 2),
237 get_node($fdata, "text", 3),
238 get_node($fdata, "text", 4),
239 get_node($fdata, "text", 5),
240 ];
241 }
242 }
243 }
244
245 ### Fetch FMI data
246 my $res = fetch_http("http://data.fmi.fi/fmi-apikey/".$opt_api_key.
247 "/wfs?request=getFeature&storedquery_id=fmi::observations::weather::cities::multipointcoverage".
248 "&parameters=temperature,humidity");
249
250 if ($res->code >= 200 && $res->code <= 201)
251 {
252 my $xml = XMLin($res->decoded_content,
253 KeyAttr => { server => 'name' },
254 ForceArray => [ 'server', 'address' ]);
255 }
256 369
257 ### Output 370 ### Output
258 if (defined($opt_outfile)) { 371 if (opt_chk_valid("outfile", 1)) {
259 open(STDOUT, '>', $opt_outfile) or die("Could not open output file '$opt_outfile'.\n"); 372 open(STDOUT, '>', opt_get("outfile")) or die("Could not open output file '".opt_get("outfile")."'.\n");
260 } 373 }
261 374
262 binmode STDOUT, ':encoding(utf-8)'; 375 binmode STDOUT, ':encoding(utf-8)';
263 376
264 foreach my $key (sort { $a cmp $b } keys %$weatherdata) 377 foreach my $key (sort { $a cmp $b } keys %$weatherdata)