changeset 331:b8ae5e99341b

fetc_weather: Remove support for old HTML format parsing of Tiehallinto data, it's not provided in this format anymore.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 19 Sep 2015 16:09:12 +0300
parents c42c900daf08
children bff53f5bba5b
files fetch_weather.pl
diffstat 1 files changed, 2 insertions(+), 246 deletions(-) [+]
line wrap: on
line diff
--- a/fetch_weather.pl	Wed Jun 10 19:05:08 2015 +0300
+++ b/fetch_weather.pl	Sat Sep 19 16:09:12 2015 +0300
@@ -93,171 +93,8 @@
 
 
 ###
-### Loose HTML parser
+### Helper functions
 ###
-sub pop_token_a($)
-{
-  my $tokens = $_[0];
-  return shift(@$tokens);
-}
-
-
-sub pop_token($)
-{
-  return str_trim(pop_token_a($_[0]));
-}
-
-
-sub parse_html_str($)
-{
-  my $tokens = $_[0];
-  my $token = pop_token($tokens);
-  my $str = "";
-  $token =~ s/^\s*//;
-
-  return undef unless (substr($token, 0, 1) eq '"');
-  $token = substr($token, 1);
-  
-  while (defined($token)) {
-    my $tmp = $token;
-    $tmp =~ s/\s*$//;
-    if (substr($tmp, -1) eq '"') {
-      $str .= substr($tmp, 0, -1);
-      return $str;
-    } else {
-      $str .= $token;
-    }
-    $token = shift(@$tokens);
-  }
-  return undef;
-}
-
-
-sub parse_html_tree($$);
-
-sub parse_html_tree($$)
-{
-  my ($tokens, $tree) = @_;
-
-  while (my $token = pop_token($tokens)) {
-    if ($token =~ /^<[!\/]?[a-zA-Z]+/) {
-      $token = lc($token);
-      if ($token =~ /^<\!.*>$/) {
-        # Ignore comments etc.
-      } elsif ($token =~ /^<([a-z]+[1-9]*)(.*)>$/) {
-        my ($name, $args) = ($1, $2);
-        if ($name eq "meta" || $name eq "img") {
-          my $tmp = {};
-          parse_html_tree($tokens, $tree);
-          return $tree;
-        } else {
-          my $tmp = { "name" => $name, "args" => str_trim($args) };
-          parse_html_tree($tokens, $tmp);
-          push(@{$$tree{"nodes"}}, $tmp);
-        }
-      } elsif ($token =~ /^<\/([a-z]+[1-9]*)>$/) {
-        return $tree;
-      } else {
-        print STDERR "ERROR: Failed to parse '$token'\n";
-        return undef;
-      }
-    } else {
-      $token = str_trim(decode_entities($token));
-      push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0;
-    }
-  }
-  
-  return $tree;
-}
-
-
-sub parse_html($)
-{
-  return undef unless defined($_[0]);
-  my $str = $_[0];
-  my $res = { "name" => "", "args" => "" };
-  $str =~ tr/\r/ /;
-  $str =~ tr/\n/ /;
-  my @tokens = grep { !($_ =~ /^\s*$/) } split(/(<\/?[a-zA-Z]+.*?>)/, $str);
-  if (defined(parse_html_tree(\@tokens, $res)))
-  {
-    return $res;
-  }
-  else
-  {
-    return undef;
-  }
-}
-
-
-sub html_find_node($$$);
-
-sub html_find_node($$$)
-{
-  my ($node, $name, $args) = @_;
-  
-  if (defined($node)) {
-    if (ref($node) eq "ARRAY") {
-      foreach my $n (@$node) {
-        my $tmp = html_find_node($n, $name, $args);
-        # Must do it like this, in order not to break the loop
-        return $tmp if defined($tmp);
-      }
-    } elsif (ref($node) eq "HASH") {
-      if (defined($$node{"name"})) {
-        if ($$node{"name"} eq $name) {
-          if ($args ne "") {
-            if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) {
-            } else {
-              return html_find_node($$node{"nodes"}, $name, $args);
-            }
-          }
-          return $node;
-        } else {
-          return html_find_node($$node{"nodes"}, $name, $args);
-        }
-      }
-    }
-  }
-  return undef;
-}
-
-
-###
-### Helper functions for locating/traversing nodes
-### in the parsed data tree structure.
-###
-
-sub fnodep(@)
-{
-  my $res = html_find_node(shift, shift, "");
-  if (defined(my $tmp = shift))
-  {
-    return defined($res) ? $res->{$tmp} : undef;
-  }
-  else
-  {
-    return $res;
-  }
-}
-
-
-sub fnodea($$$)
-{
-  return html_find_node($_[0], $_[1], $_[2]);
-}
-
-
-sub get_node($$$)
-{
-  return defined($_[0]->[$_[2]]{"nodes"}[0]{$_[1]}) ? $_[0]->[$_[2]]{"nodes"}[0]{$_[1]} : "";
-}
-
-sub get_node_lc($$$)
-{
-  return lc(get_node($_[0], $_[1], $_[2]));
-}
-
 
 my %th_rain_states =
 (
@@ -276,6 +113,7 @@
   return $tmp;
 }
 
+
 my %th_cloud_states =
 (
   0 => "selkeää",
@@ -426,88 +264,6 @@
 
 
 ###
-### Fetch tiehallinto road weather measurement data
-###
-if (opt_chk_bool("opt_tiehallinto"))
-{
-  for (my $i = 1; $i <= 22; $i++)
-  {
-    my $uri = "http://alk.tiehallinto.fi/alk/tiesaa/tiesaa_maak_".$i.".html";
-    print STDERR "Fetching ".$uri." ...\n" if (opt_get_int("debug") > 1);
-    my $res = fetch_http($uri);
-    if ($res->code >= 200 && $res->code <= 201)
-    {
-      my $data = $res->decoded_content;
-      
-      # Filter out crap tags we don't want or need
-      $data =~ s/\n/§/g;
-      $data =~ s/<!--.*?-->//ig;
-      $data =~ s/<map[^>]*>.*?<\/map>//ig;
-      $data =~ s/<form[^>]*>.*?<\/form>//ig;
-      $data =~ s/<script[^>]*>.*?<\/script>//ig;
-      $data =~ s/<meta[^>]*>//ig;
-      $data =~ s/<font[^>]*>//ig;
-      $data =~ s/<\/font>//ig;
-      $data =~ s/<span[^>]*>//ig;
-      $data =~ s/<\/span>//ig;
-      $data =~ s/<\/?b>//ig;
-
-      $data =~ s/<br>//ig;
-      $data =~ s/&nbsp;/ /ig;
-      $data =~ s/§/\n/g;
-
-      # Parse the HTML mess
-      my $otree = parse_html($data);
-      if (!defined($otree))
-      {
-        print STDERR "ERROR: Failed to parse file '".$uri."'.\n";
-        next;
-      }
-
-      print STDERR "Parsed : ".$uri." as:\n".Dumper($otree)."\n--\n" if (opt_get_int("debug") > 2);
-
-      # Find our desired element nodes
-      my $odata = fnodea(fnodep($otree, "body"), "div", "class=elementc");
-      my $oupdate = fnodep($odata, "p");
-      my $time_base = str2time("00:00");
-      if ($oupdate) {
-        my $tmp = $oupdate->{"nodes"}[0]{"text"};
-        if ($tmp =~ /:\s+(\d\d)\.(\d\d)\.(\d\d\d\d)\s+(\d\d:\d\d)/) {
-          $time_base = str2time($3."-".$2."-".$1);
-        }
-      }
-      
-      my $oelems = fnodep($odata, "table", "nodes");
-      if (defined($oelems))
-      {
-        for (my $n = 1; $n < scalar(@$oelems); $n++)
-        {
-          my $fdata = @$oelems[$n]->{"nodes"};
-          $weatherdata->{get_node($fdata, "text", 0)} =
-          [
-            # type, timestamp, temperature
-            0,
-            parse_timestamp(get_node($fdata, "text", 1), $time_base),
-            get_node_lc($fdata, "text", 2),
-            # and the rest
-            get_node_lc($fdata, "text", 3),
-            translate_rain(get_node($fdata, "text", 4)),
-            get_node_lc($fdata, "text", 5),
-          ];
-        }
-      }
-    }
-    else
-    {
-      print STDERR "Failed to fetch ".$uri." (\n" if (opt_get_int("debug") > 0);
-    }
-  }
-
-  print STDERR "Tiehallinto data blob:\n".Dumper($weatherdata)."\n--\n" if (opt_get_int("debug") > 1);
-}
-
-
-###
 ### Fetch FMI data
 ###
 if (opt_chk_bool("opt_fmi"))