changeset 162:70f432e3d1dc

Some remodeling here and there.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 20 Aug 2015 06:42:32 +0300
parents 62687fee6f1c
children 3790db4eb29b
files parsedata.pl
diffstat 1 files changed, 103 insertions(+), 98 deletions(-) [+]
line wrap: on
line diff
--- a/parsedata.pl	Thu Aug 20 06:27:40 2015 +0300
+++ b/parsedata.pl	Thu Aug 20 06:42:32 2015 +0300
@@ -7,6 +7,25 @@
 use Data::Dumper;
 use HTML::Entities;
 
+###
+### Some globals
+###
+my $modes = "php|xml";
+my $opt_mode = "php";
+my $opt_dump = 0;
+my $opt_filename;
+my $opt_outfile;
+
+my $cid = 0;
+my $hourTimes = [];
+my $hourDefs = {};
+my $hourTable = {};
+my $hourFillTable = {};
+my $maxDays = 0;
+my $firstHour = 0;
+my $lastHour = 0;
+my $totalHours = 0;
+
 
 sub urlencode($)
 {
@@ -189,100 +208,7 @@
 }
 
 
-###
-### Main program
-###
-my $modes = "php|xml";
-my $opt_mode = "php";
-my $opt_dump = 0;
-my $opt_filename;
-my $opt_outfile;
-
-while (defined(my $arg = shift)) {
-  if (substr($arg, 0, 1) eq "-") {
-    if ($arg =~ /^-($modes)$/o) {
-      $opt_mode = $1;
-    }
-    elsif ($arg eq "-dump") {
-      $opt_dump = 1;
-    }
-    elsif ($arg eq "-o") {
-      $opt_outfile = shift or die("Output filename option -o requires an argument.\n");
-    } else {
-      die("Invalid option '$arg'.\n");
-    }
-  } else {
-    $opt_filename = $arg;
-  }
-}
-
-die("Usage: $0 [options] <filename>
-
-  -php               Output a PHP include file with data in arrays (default)
-  -xml               Output a simple XML file.
-
-  -o <filename>	     Set output filename. Default is to use stdout.
-
-  -dump	             Dump HTML tree to stdout and quit.
-
-") unless defined($opt_filename);
-
-
-my $data;
-open(my $fh, '<:encoding(iso-8859-1)', $opt_filename) or die("Error opening '$opt_filename': $!\n");
-$data = do { local $/; <$fh> };
-close($fh);
-
-die("No data in input.\n") unless (defined($data) && $data ne "");
-
-
-# Filter out certain unneeded elements
-$data =~ s/<font[^>]*>//ig;
-$data =~ s/<\/font>//ig;
-$data =~ s/<\/?center>//ig;
-$data =~ s/<br>//ig;
-$data =~ s/&nbsp;/ /ig;
-
-### Get some general information
-my $otree = parse_html($data);
-if ($opt_dump) {
-  print Dumper(fnode($otree, "html"));
-  exit;
-}
-
-my %class = ();
-my $body = fnode($otree, "body");
-if (defined($body) && defined($$body{"nodes"})) {
-  foreach my $n (@{$$body{"nodes"}}) {
-    if ($$n{"name"} eq "text") {
-      push(@{$class{"info"}}, $$n{"text"});
-    }
-    elsif ($$n{"name"} eq "b") {
-      push(@{$class{"data"}}, $n);
-    }
-  }
-}
-
-# Filter out some more, for easier tree access during table parsing
-$data =~ s/<\/?b>//ig;
-my $tree = parse_html($data);
-my $node = fnode(fnode($tree, "body"), "table");
-die("No table element found in document. Perhaps the format has changed? :(\n") unless defined($node);
-
-### Parse through the HTML document node tree to find the data we need
-my $cid = 0;
-my $q = $$node{"nodes"};
-my $hourTimes = [];
-my $hourDefs = {};
-my $hourTable = {};
-my $hourFillTable = {};
-my $maxDays = 0;
-my $firstHour = 0;
-my $lastHour = 0;
-my $totalHours = 0;
-
-
-sub parseHourData($$)
+sub parse_hour_data($$)
 {
   my ($l, $rowspan) = @_;
   my $chours = $rowspan / 2; # The table is actually in half cells
@@ -359,7 +285,7 @@
   }
 }
 
-sub parseHour($)
+sub parse_hour_header($)
 {
   if ($_[0] =~ /(\d+):(\d+)/)
   {
@@ -430,6 +356,85 @@
   return join("", @out);
 }
 
+
+###
+### Main program
+###
+
+while (defined(my $arg = shift)) {
+  if (substr($arg, 0, 1) eq "-") {
+    if ($arg =~ /^-($modes)$/o) {
+      $opt_mode = $1;
+    }
+    elsif ($arg eq "-dump") {
+      $opt_dump = 1;
+    }
+    elsif ($arg eq "-o") {
+      $opt_outfile = shift or die("Output filename option -o requires an argument.\n");
+    } else {
+      die("Invalid option '$arg'.\n");
+    }
+  } else {
+    $opt_filename = $arg;
+  }
+}
+
+die("Usage: $0 [options] <filename>
+
+  -php               Output a PHP include file with data in arrays (default)
+  -xml               Output a simple XML file.
+
+  -o <filename>	     Set output filename. Default is to use stdout.
+
+  -dump	             Dump HTML tree to stdout and quit.
+
+") unless defined($opt_filename);
+
+
+my $data;
+open(my $fh, '<:encoding(iso-8859-1)', $opt_filename) or die("Error opening '$opt_filename': $!\n");
+$data = do { local $/; <$fh> };
+close($fh);
+
+die("No data in input.\n") unless (defined($data) && $data ne "");
+
+
+# Filter out certain unneeded elements
+$data =~ s/<font[^>]*>//ig;
+$data =~ s/<\/font>//ig;
+$data =~ s/<\/?center>//ig;
+$data =~ s/<br>//ig;
+$data =~ s/&nbsp;/ /ig;
+
+### Get some general information
+my $otree = parse_html($data);
+if ($opt_dump) {
+  print Dumper(fnode($otree, "html"));
+  exit;
+}
+
+my %class = ();
+my $body = fnode($otree, "body");
+if (defined($body) && defined($$body{"nodes"})) {
+  foreach my $n (@{$$body{"nodes"}}) {
+    if ($$n{"name"} eq "text") {
+      push(@{$class{"info"}}, $$n{"text"});
+    }
+    elsif ($$n{"name"} eq "b") {
+      push(@{$class{"data"}}, $n);
+    }
+  }
+}
+
+# Filter out some more, for easier tree access during table parsing
+$data =~ s/<\/?b>//ig;
+my $tree = parse_html($data);
+my $node = fnode(fnode($tree, "body"), "table");
+die("No table element found in document. Perhaps the format has changed? :(\n") unless defined($node);
+
+
+### Parse through the HTML document node tree to find the data we need
+
 # Skip zero position this way (can't use foreach here)
 for (my $i = 1; $i < scalar(@{$q}); $i++)
 {
@@ -443,12 +448,12 @@
       {
         if ($$n{"args"} =~ /colspan=6\s+rowspan=(\d+)/)
         {
-          parseHourData($l, $1);
+          parse_hour_data($l, $1);
         }
         elsif ($$n{"args"} =~ /rowspan=2\s+align/)
         {
-          my $qstart = parseHour($$l[0]{"nodes"}[0]{"nodes"}[0]{"text"});
-          my $qend = parseHour($$l[1]{"nodes"}[0]{"nodes"}[0]{"text"});
+          my $qstart = parse_hour_header($$l[0]{"nodes"}[0]{"nodes"}[0]{"text"});
+          my $qend = parse_hour_header($$l[1]{"nodes"}[0]{"nodes"}[0]{"text"});
           if (defined($qstart) && defined($qend))
           {
             push(@$hourTimes, {"start" => $qstart, "end" => $qend});