view @ 410:5c97006c89e6

fetch_weather: Cleanups.
author Matti Hamalainen <>
date Sat, 07 Jan 2017 17:56:11 +0200
parents b05da2077e44
children fe47617e7251
line wrap: on
line source

#!/usr/bin/perl -w
# Fetch Weather v0.8 by Matti 'ccr' Hamalainen <>
# (C) Copyright 2014-2017 Tecnic Software productions (TNSP)
# This script is freely distributable under GNU GPL (version 2) license.
# Should be ran as a cronjob, and configured properly.
# */10 * * * *     perl -w /absolute/path/to/ /path/to/configfile
# Configuration file example is in fetch_weather.config
# Requires various Perl modules, in Debian the packages should be:
# libwww-perl libxml-simple-perl libtimedate-perl
use 5.018;
use strict;
use warnings;
use utf8;
use LWP::UserAgent;
use HTTP::Message;
use HTML::Entities;
use Compress::Zlib;
use XML::Simple;
use Date::Format;
use Date::Parse;
use Data::Dumper;
use File::Slurper qw(read_text write_text);
use Text::CSV;

### Configuration settings
my %settings = (
  "debug" => 0,
  "opt_fmi" => 0,
  "opt_tiehallinto" => 0,
  "purge_threshold" => 60,
  "fmi_api_key" => "",
  "outfile" => "",
  "tiehallinto_static_meta" => "tiehallinto.meta",
  "http_user_agent" => "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 6.0) Opera 10.63  [en]",

### Helper functions
sub mlog($)
  print STDERR $_[0];

sub fetch_http($)
  my $agent = LWP::UserAgent->new;

  my $req = HTTP::Request->new(GET => $_[0]);
  $req->header('Accept-Encoding' => scalar HTTP::Message::decodable());

  my $res = $agent->request($req);
  return $res;

sub str_trim($)
  my $tmp = $_[0];
  $tmp =~ s/^\s*//;
  $tmp =~ s/\s*$//;
  return $tmp;

sub parse_timestamp($$)
  my ($str, $offs) = @_;
  if ($str =~ /^(\d+):(\d+)$/)
    return $offs + (60 * 60 * $1) + ($2 * 60);
    return $offs;

sub format_time_gmt($)
  # 2012-02-27T00:00:00Z
  return time2str("%Y-%m-%dT%TZ", $_[0], "UTC");

my %th_rain_states =
  "Pouta" => "poutaa",
  "Heikko" => "heikkoa sadetta",
  "Kohtalainen" => "kohtalaista sadetta",
  "Voimakas" => "voimakasta sadetta",

my $th_rain_states_k = join("|", map {quotemeta} sort { length($b)<=>length($a) } keys %th_rain_states);

sub translate_rain($)
  my $tmp = $_[0];
  $tmp =~ s/($th_rain_states_k)/$th_rain_states{$1}/igo;
  return $tmp;

my %th_cloud_states =
  0 => "selkeää",
  1 => "melkein selkeää",
  2 => "verrattain selkeää",
  3 => "verrattain selkeää",
  4 => "puolipilvistä",
  5 => "verrattain pilvistä",
  6 => "verrattain pilvistä",
  7 => "melkein pilvistä",
  8 => "pilvistä",

sub translate_clouds($)
  return "" if ($_[0] eq "NaN" || $_[0] eq "");
  my $tmp = int($_[0]);
  foreach my $n (sort { $a <=> $b } keys %th_cloud_states)
    return $th_cloud_states{$n}." (".$n."/8)" if ($tmp == $n);
  return $tmp;

### Return either data or if not defined, empty string
sub plonk_data($)
  return defined($_[0]) ? $_[0] : "";

### Same as plonk_data() but also lowercase the data string
sub plonk_data_lc($)
  return defined($_[0]) ? lc($_[0]) : "";

### Configuration handling
sub opt_chk_bool($)
  if (defined($settings{$_[0]}))
    my $val = $settings{$_[0]};
    return ($val == 1 || $val eq "true" || $val eq "on" || $val eq "1");
    return 0;

sub opt_chk_valid($$)
  if (defined($settings{$_[0]}))
    my $val = $settings{$_[0]};
    return length($val) >= $_[1];
    return 0;

sub opt_get_int($)
  if (defined($settings{$_[0]}))
    return int($settings{$_[0]});
    return -1;

sub opt_get($)
  if (defined($settings{$_[0]}))
    return $settings{$_[0]};
    return undef;

sub opt_read_config($)
  my $filename = $_[0];
  my $errors = 0;
  my $line = 0;

  open(CONFFILE, "<", $filename) or die("Could not open configuration '".$filename."'!\n");
  while (<CONFFILE>)
    if (/(^\s*#|^\s*$)/)
      # Ignore comments and empty lines
    elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*(\d+),?\s*$/)
      my $key = lc($1);
      my $value = $2;
      if (defined($settings{$key})) {
        $settings{$key} = $value;
        mlog("[$filename:$line] Unknown setting '$key' = $value\n");
        $errors = 1;
    elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*\"(.*?)\",?\s*$/)
      my $key = lc($1);
      my $value = $2;
      if (defined($settings{$key}))
        $settings{$key} = $value;
        mlog("[$filename:$line] Unknown setting '$key' = '$value'\n");
        $errors = 1;
      mlog("[$filename:$line] Syntax error: $_\n");
      $errors = 1;
  return $errors;

### Main program begins
my $weatherdata = {};

"Weather Fetch v0.8 by ccr/TNSP <ccr\>\n".
"Usage: $0 <config file> [force]\n"
) unless scalar(@ARGV) >= 1;

my $cfgfile = shift;
opt_read_config($cfgfile) == 0 or die("Errors while parsing configuration file '".$cfgfile."'.\n");
my $force_update = scalar(@ARGV) >= 1 && (shift eq "force");

### Load already cached data
if (opt_chk_valid("outfile", 1))
  my $str = read_text(opt_get("outfile"));
  if (defined($str))
    foreach my $line (split(/\s*\n\s*/, $str))
      my @mtmp = split(/\|/, $line, -1);
      if (scalar(\@mtmp) >= 3)
        $weatherdata->{shift @mtmp} = \@mtmp;
    print STDERR scalar(keys %$weatherdata)." old records reloaded.\n" if (opt_get_int("debug") > 0);

### Fetch Tiehallinto data
if (opt_chk_bool("opt_tiehallinto"))
  my $uri = "";
  print STDERR "Fetching Tiehallinto road weather data from ".$uri."\n" if (opt_get_int("debug") > 0);
  my $res = fetch_http($uri);
  if ($res->code >= 200 && $res->code <= 201)
    my $xml = XMLin($res->decoded_content);

    if (!defined($xml->{"soap:Body"}) || !defined($xml->{"soap:Body"}{"RoadWeatherResponse"}))
      print STDERR "ERROR: SOAP call result did not contain required data.\n";
      print STDERR $res->decoded_content."\n\n";
      # Parse the XML
      my $data = $xml->{"soap:Body"}{"RoadWeatherResponse"};

      # Check if we need to update the static meta data
      my $meta_file = opt_get("tiehallinto_static_meta");
      my $fetch_meta = (-e $meta_file) ? 0 : 1;

      if (defined($data->{"laststaticdataupdate"}))
        # Compare metadata cache file modification timestamp to info in XML
        my $tmp1 = str2time($data->{"laststaticdataupdate"});
        my $tmp2 = (-e $meta_file) ? (stat($meta_file))[9] : -1;
        $fetch_meta = 1 unless ($tmp1 < $tmp2);

      # Fetch or read the cache
      my $meta_str;
      if ($fetch_meta || $force_update)
        my $uri = "";
        print STDERR "Fetching Tiehallinto static meta data from $uri\n" if (opt_get_int("debug") > 1);
        my $res = fetch_http($uri);
        die("Failed to fetch $uri data.\n") unless ($res->code <= 200 && $res->code <= 201);

        print STDERR "Storing to cache '$meta_file'.\n" if (opt_get_int("debug") > 0);
        $meta_str = $res->decoded_content;
        write_text($meta_file, $meta_str, "utf8");
        print STDERR "Using CACHED Tiehallinto static meta data from '$meta_file'.\n" if (opt_get_int("debug") > 0);
        $meta_str = read_text($meta_file, "utf-8");

      # Parse the data ..
      my $meta_data = {};
#      my $csv = Text::CSV->new({blank_is_undef => 1, decode_utf8 => 1, sep_char => ";"});
      my $csv = Text::CSV->new({blank_is_undef => 1, sep_char => ";"});
      die("Failed to instantiate Text::CSV object?\n") unless defined($csv);

      foreach my $line (split(/\s*\n\s*/, $meta_str))
        if (defined($line) && $csv->parse($line))
          my @fields = $csv->fields();
          if (scalar(@fields) > 1)
            $$meta_data{$fields[0]} = \@fields;

      # Parse XML and combine with the station meta data
      if (defined($data->{"roadweatherdata"}))
        my $nrecords = 0;
        foreach my $wdata (@{$data->{"roadweatherdata"}{"roadweather"}})
          my $wid = $wdata->{"stationid"};
          if (defined($meta_data->{$wid}) && defined($meta_data->{$wid}[2]) && $meta_data->{$wid}[2] ne "")
            $weatherdata->{$meta_data->{$wid}[2]} =

            print STDERR "Station ID #".$wid." not defined?\n" if (opt_get_int("debug") > 0);
        print STDERR $nrecords." records from Tiehallinto.\n" if (opt_get_int("debug") > 0);
        print STDERR "ERROR: Invalid (or unsupported) road weather data blob.\n";
        print STDERR $res->decoded_content."\n\n";

### Fetch FMI data
if (opt_chk_bool("opt_fmi"))
  die("FMI data scrape enabled, but no API key set.\n") unless opt_chk_valid("fmi_api_key", 10);
  my @fmitems = ("temperature", "humidity", "windspeedms", "totalcloudcover");

  my $uri = "".opt_get("fmi_api_key").
#    "timevaluepair".
    "&starttime=".format_time_gmt(time() - 10*60)."&endtime=".format_time_gmt(time()).
    "&parameters=".join(",", @fmitems)."&maxlocations=100&bbox=19,59,32,75";

  print STDERR "FMI URI: ".$uri."\n" if (opt_get_int("debug") > 0);

  my $res = fetch_http($uri);
  if ($res->code >= 200 && $res->code <= 201)
    my $xml = XMLin($res->decoded_content);
    my $time_base = time();

    if (defined($xml->{"wfs:member"}{"omso:GridSeriesObservation"}))
      my $fdata = $xml->{"wfs:member"}{"omso:GridSeriesObservation"};
      my $fshit = $fdata->{"om:result"}{"gmlcov:MultiPointCoverage"};

      my @position_lines = split(/\n/, $fshit->{"gml:domainSet"}{"gmlcov:SimpleMultiPoint"}{"gmlcov:positions"});
      my @data_lines = split(/\n/, $fshit->{"gml:rangeSet"}{"gml:DataBlock"}{"gml:doubleOrNilReasonTupleList"});
      my @farray = ();

      if (scalar(@position_lines) == scalar(@data_lines))
        for (my $nline = 0; $nline < scalar(@position_lines); $nline++)
          my $dline = str_trim($data_lines[$nline]);
          my $pline = str_trim($position_lines[$nline]);

          my @fmatches = ($dline =~ /\s*([\+\-]?\d+\.\d*|NaN)\s*/ig);
          if (scalar(@fmatches) != scalar(@fmitems))
            print STDERR "Not enough items in scalar line #".$nline." (".
              scalar(@fmatches). " vs ".scalar(@fmitems)."): ".$dline."\n";
            my $vtmp = {};
            for (my $fni = 0; $fni < scalar(@fmitems); $fni++)
              $$vtmp{$fmitems[$fni]} = $fmatches[$fni] if (lc($fmatches[$fni]) ne "nan");
            if ($pline =~ /^\s*([\+\-]?\d+\.\d*)\s+([\+\-]?\d+\.\d*)\s+(\d+)\s*$/)
              $$vtmp{"lat"} = $1;
              $$vtmp{"long"} = $2;
              $$vtmp{"time"} = $3;
              push(@farray, $vtmp);
              print STDERR "Data mismatch #".$nline.": ".$pline."\n";
        print STDERR "Position and data line counts do not match.\n";
        goto skip_it;
      # XXX Hashify the array into lat/long keys

      # This is horrible :S
      my $nrecords = 0;
      foreach my $xnode (@{$fdata->{"om:featureOfInterest"}{"sams:SF_SpatialSamplingFeature"}{"sams:shape"}{"gml:MultiPoint"}{"gml:pointMember"}})
        my $floc = $xnode->{"gml:Point"};
        if ($floc->{"gml:pos"} =~ /^\s*([\+\-]?\d+\.\d*)\s+([\+\-]?\d+\.\d*)\s*$/)
          my ($flat, $flong) = ($1, $2);

          # Should use a hash -
          foreach my $frec (@farray)
            if ($frec->{"lat"} == $flat && $frec->{"long"} == $flong &&
                $floc->{"gml:name"} ne "")
              $weatherdata->{$floc->{"gml:name"}} =


      print STDERR $nrecords." records from FMI.\n" if (opt_get_int("debug") > 0);
      # defined
      print STDERR "Invalid XML received:\n";
      print STDERR $res->decoded_content."\n\n";
    print STDERR "Error fetching FMI XML: ".$res->status_line."\n";

### Skip here if the FMI shit fails due to broken data

### Purge too old entries
if (opt_chk_valid("purge_threshold", 1))
  my $purge = opt_get_int("purge_threshold");
  if ($purge > 0)
    my $wqtime = time();
    my $nold = scalar(keys %$weatherdata);

    foreach my $key (keys %$weatherdata)
      if ($wqtime - $weatherdata->{$key}[1] > (60 * $purge))
        delete $$weatherdata{$key};

    my $nnew = scalar(keys %$weatherdata);
    print STDERR "Purged data older than ".$purge." minutes, ".$nold." -> ".$nnew." = ".($nold - $nnew)." removed.\n" if (opt_get_int("debug") > 0);

### Output
if (opt_chk_valid("outfile", 1))
  print STDERR "Dumping data to output file '".opt_get("outfile")."'\n" if (opt_get_int("debug") > 0);
  open(STDOUT, '>', opt_get("outfile")) or die("Could not open output file '".opt_get("outfile")."': $!\n");

binmode STDOUT, ':encoding(utf-8)';

foreach my $key (sort { $a cmp $b } keys %$weatherdata)
  print STDOUT $key."|".join("|", @{$weatherdata->{$key}})."\n";
