# HG changeset patch # User Matti Hamalainen # Date 1443267587 -10800 # Node ID bb4abe5cc235f4281e198e5be9fbeaf07f863659 # Parent e6473e7083aa08a4e8fd3822d46d894616c7771a# Parent bff53f5bba5bcecc0e4e658e42f84aa45ebc6549 Branch merge. diff -r e6473e7083aa -r bb4abe5cc235 fetch_weather.pl --- a/fetch_weather.pl Sat Sep 26 14:39:02 2015 +0300 +++ b/fetch_weather.pl Sat Sep 26 14:39:47 2015 +0300 @@ -1,7 +1,7 @@ #!/usr/bin/perl -w ########################################################################## # -# Fetch Weather v0.3 by Matti 'ccr' Hamalainen +# Fetch Weather v0.4 by Matti 'ccr' Hamalainen # (C) Copyright 2014-2015 Tecnic Software productions (TNSP) # This script is freely distributable under GNU GPL (version 2) license. # @@ -93,171 +93,8 @@ ### -### Loose HTML parser +### Helper functions ### -sub pop_token_a($) -{ - my $tokens = $_[0]; - return shift(@$tokens); -} - - -sub pop_token($) -{ - return str_trim(pop_token_a($_[0])); -} - - -sub parse_html_str($) -{ - my $tokens = $_[0]; - my $token = pop_token($tokens); - my $str = ""; - $token =~ s/^\s*//; - - return undef unless (substr($token, 0, 1) eq '"'); - $token = substr($token, 1); - - while (defined($token)) { - my $tmp = $token; - $tmp =~ s/\s*$//; - if (substr($tmp, -1) eq '"') { - $str .= substr($tmp, 0, -1); - return $str; - } else { - $str .= $token; - } - $token = shift(@$tokens); - } - return undef; -} - - -sub parse_html_tree($$); - -sub parse_html_tree($$) -{ - my ($tokens, $tree) = @_; - - while (my $token = pop_token($tokens)) { - if ($token =~ /^<[!\/]?[a-zA-Z]+/) { - $token = lc($token); - if ($token =~ /^<\!.*>$/) { - # Ignore comments etc. - } elsif ($token =~ /^<([a-z]+[1-9]*)(.*)>$/) { - my ($name, $args) = ($1, $2); - if ($name eq "meta" || $name eq "img") { - my $tmp = {}; - parse_html_tree($tokens, $tree); - return $tree; - } else { - my $tmp = { "name" => $name, "args" => str_trim($args) }; - parse_html_tree($tokens, $tmp); - push(@{$$tree{"nodes"}}, $tmp); - } - } elsif ($token =~ /^<\/([a-z]+[1-9]*)>$/) { - return $tree; - } else { - print STDERR "ERROR: Failed to parse '$token'\n"; - return undef; - } - } else { - $token = str_trim(decode_entities($token)); - push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; - } - } - - return $tree; -} - - -sub parse_html($) -{ - return undef unless defined($_[0]); - my $str = $_[0]; - my $res = { "name" => "", "args" => "" }; - $str =~ tr/\r/ /; - $str =~ tr/\n/ /; - my @tokens = grep { !($_ =~ /^\s*$/) } split(/(<\/?[a-zA-Z]+.*?>)/, $str); - if (defined(parse_html_tree(\@tokens, $res))) - { - return $res; - } - else - { - return undef; - } -} - - -sub html_find_node($$$); - -sub html_find_node($$$) -{ - my ($node, $name, $args) = @_; - - if (defined($node)) { - if (ref($node) eq "ARRAY") { - foreach my $n (@$node) { - my $tmp = html_find_node($n, $name, $args); - # Must do it like this, in order not to break the loop - return $tmp if defined($tmp); - } - } elsif (ref($node) eq "HASH") { - if (defined($$node{"name"})) { - if ($$node{"name"} eq $name) { - if ($args ne "") { - if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) { - } else { - return html_find_node($$node{"nodes"}, $name, $args); - } - } - return $node; - } else { - return html_find_node($$node{"nodes"}, $name, $args); - } - } - } - } - return undef; -} - - -### -### Helper functions for locating/traversing nodes -### in the parsed data tree structure. -### - -sub fnodep(@) -{ - my $res = html_find_node(shift, shift, ""); - if (defined(my $tmp = shift)) - { - return defined($res) ? $res->{$tmp} : undef; - } - else - { - return $res; - } -} - - -sub fnodea($$$) -{ - return html_find_node($_[0], $_[1], $_[2]); -} - - -sub get_node($$$) -{ - return defined($_[0]->[$_[2]]{"nodes"}[0]{$_[1]}) ? $_[0]->[$_[2]]{"nodes"}[0]{$_[1]} : ""; -} - -sub get_node_lc($$$) -{ - return lc(get_node($_[0], $_[1], $_[2])); -} - my %th_rain_states = ( @@ -276,6 +113,7 @@ return $tmp; } + my %th_cloud_states = ( 0 => "selkeää", @@ -417,7 +255,7 @@ my $weatherdata = {}; die( -"Weather Fetch v0.3 by ccr/TNSP \n". +"Weather Fetch v0.4 by ccr/TNSP \n". "Usage: $0 \n" ) unless scalar(@ARGV) >= 1; @@ -426,88 +264,6 @@ ### -### Fetch tiehallinto road weather measurement data -### -if (opt_chk_bool("opt_tiehallinto")) -{ - for (my $i = 1; $i <= 22; $i++) - { - my $uri = "http://alk.tiehallinto.fi/alk/tiesaa/tiesaa_maak_".$i.".html"; - print STDERR "Fetching ".$uri." ...\n" if (opt_get_int("debug") > 1); - my $res = fetch_http($uri); - if ($res->code >= 200 && $res->code <= 201) - { - my $data = $res->decoded_content; - - # Filter out crap tags we don't want or need - $data =~ s/\n/§/g; - $data =~ s///ig; - $data =~ s/]*>.*?<\/map>//ig; - $data =~ s/]*>.*?<\/form>//ig; - $data =~ s/]*>.*?<\/script>//ig; - $data =~ s/]*>//ig; - $data =~ s/]*>//ig; - $data =~ s/<\/font>//ig; - $data =~ s/]*>//ig; - $data =~ s/<\/span>//ig; - $data =~ s/<\/?b>//ig; - - $data =~ s/
//ig; - $data =~ s/ / /ig; - $data =~ s/§/\n/g; - - # Parse the HTML mess - my $otree = parse_html($data); - if (!defined($otree)) - { - print STDERR "ERROR: Failed to parse file '".$uri."'.\n"; - next; - } - - print STDERR "Parsed : ".$uri." as:\n".Dumper($otree)."\n--\n" if (opt_get_int("debug") > 2); - - # Find our desired element nodes - my $odata = fnodea(fnodep($otree, "body"), "div", "class=elementc"); - my $oupdate = fnodep($odata, "p"); - my $time_base = str2time("00:00"); - if ($oupdate) { - my $tmp = $oupdate->{"nodes"}[0]{"text"}; - if ($tmp =~ /:\s+(\d\d)\.(\d\d)\.(\d\d\d\d)\s+(\d\d:\d\d)/) { - $time_base = str2time($3."-".$2."-".$1); - } - } - - my $oelems = fnodep($odata, "table", "nodes"); - if (defined($oelems)) - { - for (my $n = 1; $n < scalar(@$oelems); $n++) - { - my $fdata = @$oelems[$n]->{"nodes"}; - $weatherdata->{get_node($fdata, "text", 0)} = - [ - # type, timestamp, temperature - 0, - parse_timestamp(get_node($fdata, "text", 1), $time_base), - get_node_lc($fdata, "text", 2), - # and the rest - get_node_lc($fdata, "text", 3), - translate_rain(get_node($fdata, "text", 4)), - get_node_lc($fdata, "text", 5), - ]; - } - } - } - else - { - print STDERR "Failed to fetch ".$uri." (\n" if (opt_get_int("debug") > 0); - } - } - - print STDERR "Tiehallinto data blob:\n".Dumper($weatherdata)."\n--\n" if (opt_get_int("debug") > 1); -} - - -### ### Fetch FMI data ### if (opt_chk_bool("opt_fmi"))