Mercurial > hg > egg-tcls
view fetch_weather.pl @ 368:ed1c7f2d81df
fetch_weather: Close STDOUT filehandle before reopening it, to make sure we
are actually writing data to the intended output.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 18 Dec 2015 10:09:32 +0200 |
parents | 1ca8ae195077 |
children | 86adad909681 |
line wrap: on
line source
#!/usr/bin/perl -w ########################################################################## # # Fetch Weather v0.6 by Matti 'ccr' Hamalainen <ccr@tnsp.org> # (C) Copyright 2014-2015 Tecnic Software productions (TNSP) # This script is freely distributable under GNU GPL (version 2) license. # # Should be ran as a cronjob, and configured properly. # */10 * * * * perl -w /absolute/path/to/fetch_weather.pl /path/to/configfile # # Configuration file example is in fetch_weather.config # # Requires various Perl modules, in Debian the packages should be: # libwww-perl libxml-simple-perl libtimedate-perl # # ########################################################################## use strict; use utf8; use LWP::UserAgent; use HTML::Entities; use XML::Simple; use Date::Format; use Date::Parse; use Data::Dumper; use File::Slurp; use Text::CSV; ### ### Configuration settings ### my %settings = ( "debug" => 0, "opt_fmi" => 0, "opt_tiehallinto" => 0, "fmi_api_key" => "", "outfile" => "", "tiehallinto_static_meta" => "tiehallinto.meta", "http_user_agent" => "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 6.0) Opera 10.63 [en]", ); ### ### Helper functions ### sub mlog($) { print STDERR $_[0]; } sub fetch_http($) { my $agent = LWP::UserAgent->new; $agent->agent($settings{"http_user_agent"}); $agent->timeout(20); my $req = HTTP::Request->new(GET => $_[0]); return $agent->request($req); } sub parse_timestamp($$) { my ($str, $offs) = @_; if ($str =~ /^(\d+):(\d+)$/) { return $offs + (60 * 60 * $1) + ($2 * 60); } else { return $offs; } } sub format_time_gmt($) { # 2012-02-27T00:00:00Z return time2str("%Y-%m-%dT%TZ", $_[0], "UTC"); } sub str_trim($) { my $str = $_[0]; if (defined($str)) { $str =~ s/^\s*//; $str =~ s/\s*$//; } return $str; } ### ### Helper functions ### my %th_rain_states = ( "Pouta" => "poutaa", "Heikko" => "heikkoa sadetta", "Kohtalainen" => "kohtalaista sadetta", "Voimakas" => "voimakasta sadetta", ); my $th_rain_states_k = join("|", map {quotemeta} sort { length($b)<=>length($a) } keys %th_rain_states); sub translate_rain($) { my $tmp = $_[0]; $tmp =~ s/($th_rain_states_k)/$th_rain_states{$1}/igo; return $tmp; } my %th_cloud_states = ( 0 => "selkeää", 1 => "melkein selkeää", 2 => "verrattain selkeää", 3 => "verrattain selkeää", 4 => "puolipilvistä", 5 => "verrattain pilvistä", 6 => "verrattain pilvistä", 7 => "melkein pilvistä", 8 => "pilvistä", ); sub translate_clouds($) { return "" if ($_[0] eq "NaN" || $_[0] eq ""); my $tmp = int($_[0]); foreach my $n (sort { $a <=> $b } keys %th_cloud_states) { return $th_cloud_states{$n}." (".$n."/8)" if ($tmp == $n); } return $tmp; } sub plonk_data($) { return defined($_[0]) ? $_[0] : ""; } sub plonk_data_lc($) { return defined($_[0]) ? lc($_[0]) : ""; } ### ### Configuration handling ### sub opt_chk_bool($) { if (defined($settings{$_[0]})) { my $val = $settings{$_[0]}; return ($val == 1 || $val eq "true" || $val eq "on" || $val eq "1"); } else { return 0; } } sub opt_chk_valid($$) { if (defined($settings{$_[0]})) { my $val = $settings{$_[0]}; return length($val) >= $_[1]; } else { return 0; } } sub opt_get_int($) { if (defined($settings{$_[0]})) { return int($settings{$_[0]}); } else { return -1; } } sub opt_get($) { if (defined($settings{$_[0]})) { return $settings{$_[0]}; } else { return undef; } } sub opt_read_config($) { my $filename = $_[0]; my $errors = 0; my $line = 0; open(CONFFILE, "<", $filename) or die("Could not open configuration '".$filename."'!\n"); while (<CONFFILE>) { $line++; chomp; if (/(^\s*#|^\s*$)/) { # Ignore comments and empty lines } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*(\d+),?\s*$/) { my $key = lc($1); my $value = $2; if (defined($settings{$key})) { $settings{$key} = $value; } else { mlog("[$filename:$line] Unknown setting '$key' = $value\n"); $errors = 1; } } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*\"(.*?)\",?\s*$/) { my $key = lc($1); my $value = $2; if (defined($settings{$key})) { $settings{$key} = $value; } else { mlog("[$filename:$line] Unknown setting '$key' = '$value'\n"); $errors = 1; } } else { mlog("[$filename:$line] Syntax error: $_\n"); $errors = 1; } } close(CONFFILE); return $errors; } ### ### Main program begins ### my $weatherdata = {}; die( "Weather Fetch v0.6 by ccr/TNSP <ccr\@tnsp.org>\n". "Usage: $0 <config file>\n" ) unless scalar(@ARGV) >= 1; my $cfgfile = shift; opt_read_config($cfgfile) == 0 or die("Errors while parsing configuration file '".$cfgfile."'.\n"); ### ### Load already cached data ### if (opt_chk_valid("outfile", 1)) { my $str = read_file(opt_get("outfile"), binmode => ':utf8', err_mode => 'quiet'); if (defined($str)) { foreach my $line (split(/\s*\n\s*/, $str)) { my @mtmp = split(/\|/, $line, -1); if (scalar(\@mtmp) >= 3) { $weatherdata->{shift @mtmp} = \@mtmp; } } print STDERR scalar(keys %$weatherdata)." old records reloaded.\n" if (opt_get_int("debug") > 0); } } ### ### Fetch Tiehallinto data ### if (opt_chk_bool("opt_tiehallinto")) { my $uri = "http://tie.digitraffic.fi/sujuvuus/ws/roadWeather"; my $res = fetch_http($uri); if ($res->code >= 200 && $res->code <= 201) { my $xml = XMLin($res->decoded_content); if (!defined($xml->{"soap:Body"}) || !defined($xml->{"soap:Body"}{"RoadWeatherResponse"})) { print STDERR "ERROR: SOAP call result did not contain required data.\n"; print STDERR $res->decoded_content."\n\n"; } else { my $data = $xml->{"soap:Body"}{"RoadWeatherResponse"}; print STDERR "Checking Tiehallinto response from ".$uri."\n" if (opt_get_int("debug") > 0); # Check if we need to update the static meta data my $meta_file = opt_get("tiehallinto_static_meta"); my $fetch_meta = (-e $meta_file) ? 0 : 1; if (defined($data->{"laststaticdataupdate"})) { # Compare metadata cache file modification timestamp to info in XML my $tmp1 = str2time($data->{"laststaticdataupdate"}); my $tmp2 = (-e $meta_file) ? (stat($meta_file))[9] : -1; $fetch_meta = 1 unless ($tmp1 < $tmp2); } # Fetch or read the cache my $meta_str; if ($fetch_meta) { print STDERR "Fetching Tiehallinto static meta data.\n" if (opt_get_int("debug") > 0); my $uri = "https://raw.githubusercontent.com/finnishtransportagency/metadata/master/csv/meta_traffic_stations.csv"; my $res = fetch_http($uri); die("Failed to fetch $uri data.\n") unless ($res->code <= 200 && $res->code <= 201); print STDERR "Storing to cache '$meta_file'.\n" if (opt_get_int("debug") > 0); $meta_str = $res->decoded_content; write_file($meta_file, {binmode => ':utf8'}, $meta_str); } else { print STDERR "Using CACHED Tiehallinto static meta data from '$meta_file'.\n" if (opt_get_int("debug") > 0); $meta_str = read_file($meta_file, binmode => ':utf8'); } # Parse the data .. my $meta_data = {}; # my $csv = Text::CSV->new({blank_is_undef => 1, decode_utf8 => 1}); my $csv = Text::CSV->new({blank_is_undef => 1}); die("Failed to instantiate Text::CSV object?\n") unless defined($csv); foreach my $line (split(/\s*\n\s*/, $meta_str)) { if (defined($line) && $csv->parse($line)) { my @fields = $csv->fields(); if (scalar(@fields) > 1) { $$meta_data{$fields[1]} = \@fields; } } } # Parse XML and combine with the station meta data if (defined($data->{"roadweatherdata"})) { my $nrecords = 0; foreach my $wdata (@{$data->{"roadweatherdata"}{"roadweather"}}) { my $wid = $wdata->{"stationid"}; if (defined($meta_data->{$wid})) { $nrecords++; $weatherdata->{$meta_data->{$wid}[3]} = [ 1, str2time(plonk_data($wdata->{"measurementtime"}{"utc"})), plonk_data($wdata->{"airtemperature1"}), plonk_data($wdata->{"humidity"}), plonk_data($wdata->{"averagewindspeed"}), ]; } } print STDERR $nrecords." records from Tiehallinto.\n" if (opt_get_int("debug") > 0); } else { print STDERR "ERROR: Invalid (or unsupported) road weather data blob.\n"; print STDERR $res->decoded_content."\n\n"; } } } } ### ### Fetch FMI data ### if (opt_chk_bool("opt_fmi")) { die("FMI data scrape enabled, but no API key set.\n") unless opt_chk_valid("fmi_api_key", 10); my @fmitems = ("temperature", "humidity", "windspeedms", "totalcloudcover"); my $uri = "http://data.fmi.fi/fmi-apikey/".opt_get("fmi_api_key"). "/wfs?request=getFeature&storedquery_id=fmi::observations::weather::". "multipointcoverage". # "timevaluepair". "&starttime=".format_time_gmt(time() - 10*60)."&endtime=".format_time_gmt(time()). "¶meters=".join(",", @fmitems)."&maxlocations=100&bbox=19,59,32,75"; print STDERR "FMI URI: ".$uri."\n" if (opt_get_int("debug") > 0); my $res = fetch_http($uri); if ($res->code >= 200 && $res->code <= 201) { my $xml = XMLin($res->decoded_content); my $time_base = time(); if (defined($xml->{"wfs:member"}{"omso:GridSeriesObservation"})) { my $fdata = $xml->{"wfs:member"}{"omso:GridSeriesObservation"}; my $fshit = $fdata->{"om:result"}{"gmlcov:MultiPointCoverage"}; my @farray = (); foreach my $fline (split(/\n/, $fshit->{"gml:domainSet"}{"gmlcov:SimpleMultiPoint"}{"gmlcov:positions"})) { if ($fline =~ /^\s*([\+\-]?\d+\.\d*)\s+([\+\-]?\d+\.\d*)\s+(\d+)\s*$/) { push(@farray, {"lat" => $1, "long" => $2, "time" => $3}); } } my $findex = 0; foreach my $fline (split(/\n/, $fshit->{"gml:rangeSet"}{"gml:DataBlock"}{"gml:doubleOrNilReasonTupleList"})) { my @fmatches = ($fline =~ /\s*([\+\-]?\d+\.\d*|NaN)\s*/ig); if (scalar(@fmatches) > 0) { die("Not enough items in scalar line (".scalar(@fmatches). " vs ".scalar(@fmitems). "): ".$fline."\n") if (scalar(@fmatches) != scalar(@fmitems)); for (my $fni = 0; $fni < scalar(@fmitems); $fni++) { $farray[$findex]{$fmitems[$fni]} = $fmatches[$fni] if (lc($fmatches[$fni]) ne "nan"); } $findex++; } } # XXX Hashify the array into lat/long keys # This is horrible :S my $fcrap = $fdata->{"om:featureOfInterest"}{"sams:SF_SpatialSamplingFeature"}{"sams:shape"}{"gml:MultiPoint"}{"gml:pointMember"}; my $nrecords = 0; foreach my $xnode (@{$fcrap}) { my $floc = $xnode->{"gml:Point"}; if ($floc->{"gml:pos"} =~ /^\s*([\+\-]?\d+\.\d*)\s+([\+\-]?\d+\.\d*)\s*$/) { my ($flat, $flong) = ($1, $2); # Should use a hash - foreach my $flol (@farray) { if ($flol->{"lat"} == $flat && $flol->{"long"} == $flong) { $nrecords++; $weatherdata->{$floc->{"gml:name"}} = [ 1, plonk_data($flol->{"time"}), plonk_data($flol->{"temperature"}), plonk_data($flol->{"humidity"}), plonk_data($flol->{"windspeedms"}), translate_clouds(plonk_data($flol->{"totalcloudcover"})), ]; } } } } print STDERR $nrecords." records from FMI.\n" if (opt_get_int("debug") > 0); } else { # defined print STDERR "Invalid XML received:\n"; print STDERR $res->decoded_content."\n\n"; } } else { print STDERR "Error fetching FMI XML: ".$res->status_line."\n"; } } ### ### Output ### if (opt_chk_valid("outfile", 1)) { close(STDOUT); open(STDOUT, '>', opt_get("outfile")) or die("Could not open output file '".opt_get("outfile")."'.\n"); } binmode STDOUT, ':encoding(utf-8)'; foreach my $key (sort { $a cmp $b } keys %$weatherdata) { print STDOUT $key."|".join("|", @{$weatherdata->{$key}})."\n"; } close(STDOUT);