changeset 57:93c87f42c803

New parser output format, parser logic cleaned up a bit.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 24 Oct 2012 01:18:45 +0300
parents 5adf3ec26aa0
children 119f0cef6498
files parsedata.pl
diffstat 1 files changed, 111 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/parsedata.pl	Wed Oct 24 01:18:24 2012 +0300
+++ b/parsedata.pl	Wed Oct 24 01:18:45 2012 +0300
@@ -272,10 +272,78 @@
 ### Parse through the HTML document node tree to find the data we need
 my $cid = 0;
 my $q = $$node{"nodes"};
+my $hourTimes = [];
 my $hourDefs = {};
 my $hourTable = {};
+my $hourFillTable = {};
 my $maxDays = 0;
-my $maxHours = 0;
+my $firstHour = 0;
+my $lastHour = 0;
+my $totalHours = 0;
+
+
+sub parseHourData($$)
+{
+  my ($l, $rowspan) = @_;
+  my $chours = $rowspan / 2; # The table is actually in half cells
+  my $cdata = [];
+  my $cgrouped = 0;
+
+  # Pull in data for the class/hour cell
+  foreach my $h (@{$l}) {
+    if (defined($$h{"nodes"})) {
+      foreach my $b (@{$$h{"nodes"}}) {
+        if (defined($$b{"nodes"})) {
+          my $text = $$b{"nodes"}[0]{"text"};
+          $text =~ s/\.$//;
+
+          $cgrouped = 1 if ($text =~ /vuorov/);
+
+          push(@$cdata, $text);
+        }
+      }
+    }
+  }
+
+  # Increased ID if there is data in this class/hour cell
+  my $tid;
+  if (scalar(@$cdata) > 0) {
+    $cid++;
+    $tid = $cid;
+  } else {
+    $tid = 0;
+  }
+
+  # Determine current day
+  my $cday = 0;
+  for (my $x = 0; $x < 7; $x++) {
+    if (!defined($$hourFillTable{$lastHour}{$x})) {
+      $cday = $x;
+      last;
+    }
+  }
+  for (my $t = 0; $t < $chours; $t++) {
+    $$hourFillTable{$lastHour + $t}{$cday} = $tid;
+  }
+  
+  if ($tid) {
+    $maxDays = $cday + 1 if ($cday + 1 > $maxDays);
+
+    # Grouped, if there is another class ID in second slot
+    $cgrouped = 1 if ($$cdata[1] =~ /^[A-Z]\d{6}$/);
+    $$hourDefs{$cid} = { "grouped" => $cgrouped, "start" => $lastHour, "hours" => $chours, "data" => $cdata };
+    push(@{$$hourTable{$cday}}, $tid);
+    $totalHours += $chours;
+  }
+}
+
+sub parseHour($)
+{
+  if ($_[0] =~ /(\d+):(\d+)/) {
+    return ((int($1) * 60 + int($2)) * 60);
+  }
+  return undef;
+}
 
 # Skip zero position this way (can't use foreach here)
 for (my $i = 1; $i < scalar(@{$q}); $i++) {
@@ -283,67 +351,43 @@
   if (defined($d)) {
     foreach my $n (@{$d}) {
       my $l = $$n{"nodes"}[0]{"nodes"};
-      if (defined($l) && $$n{"args"} =~ /colspan=6\s+rowspan=(\d+)/) {
-        my $chours = $1 / 2; # The table is actually in half cells
-        my $cdata = [];
-        my $cgrouped = 0;
-
-        foreach my $h (@{$l}) {
-          if (defined($$h{"nodes"})) {
-            foreach my $b (@{$$h{"nodes"}}) {
-              if (defined($$b{"nodes"})) {
-                my $text = $$b{"nodes"}[0]{"text"};
-                $text =~ s/\.$//;
-
-                $cgrouped = 1 if ($text =~ /vuorov/);
-
-                push(@$cdata, $text);
-              }
-            }
-          }
+      if (defined($l))
+      {
+        if ($$n{"args"} =~ /colspan=6\s+rowspan=(\d+)/) {
+          parseHourData($l, $1);
         }
-
-        my $tid;
-        if (scalar(@$cdata) > 0) {
-          $cid++;
-          $tid = $cid;
-        } else {
-          $tid = 0;
-        }
-
-        my $cday = 0;
-        for (my $x = 0; $x < 7; $x++) {
-          if (!defined($$hourTable{$maxHours}{$x})) {
-            $cday = $x;
-            $maxDays = $x if ($x > $maxDays);
-            last;
+        elsif ($$n{"args"} =~ /rowspan=2\s+align/) {
+          my $qstart = parseHour($$l[0]{"nodes"}[0]{"nodes"}[0]{"text"});
+          my $qend = parseHour($$l[1]{"nodes"}[0]{"nodes"}[0]{"text"});
+          if (defined($qstart) && defined($qend)) {
+            push(@$hourTimes, {"start" => $qstart, "end" => $qend});
           }
         }
-        for (my $t = 0; $t < $chours; $t++) {
-          $$hourTable{$maxHours + $t}{$cday} = $tid;
-        }
-        
-        if (scalar(@$cdata) > 0) {
-          # Grouped, if there is another class ID in second slot
-          $cgrouped = 1 if ($$cdata[1] =~ /^[A-Z]\d{6}$/);
-          $$hourDefs{$cid} = { "grouped" => $cgrouped, "day" => $cday, "start" => $maxHours, "hours" => $chours, "data" => $cdata };
-        }
       }
     }
-    $maxHours++;
+    $lastHour++;
   }
 }
 
 
 ### Go through hour table, find last day and hour of the week, crop
 my $flag = 1;
-for (my $y = $maxHours - 1; $y >= 0 && $flag; $y--) {
+for (my $y = 0; $y < $lastHour && $flag; $y++) {
   for (my $x = 0; $x < $maxDays && $flag; $x++) {
-    $flag = 0 if (defined($$hourTable{$y}{$x}) && $$hourTable{$y}{$x} != 0);
+    $flag = 0 if (defined($$hourFillTable{$y}{$x}) && $$hourFillTable{$y}{$x} != 0);
   }
-  $maxHours-- if ($flag);
+  $firstHour++ if ($flag);
 }
 
+$flag = 1;
+for (my $y = $lastHour - 1; $y >= 0 && $flag; $y--) {
+  for (my $x = 0; $x < $maxDays && $flag; $x++) {
+    $flag = 0 if (defined($$hourFillTable{$y}{$x}) && $$hourFillTable{$y}{$x} != 0);
+  }
+  $lastHour-- if ($flag);
+}
+
+
 ### Open output file, if specified
 if (defined($opt_outfile)) {
   open(STDOUT, '>', $opt_outfile) or die("Could not open output file '$opt_outfile'.\n");
@@ -358,10 +402,18 @@
   "  \"general\" => array(".join(", ", map { "\"".escape($_)."\""; } @{$class{"info"}})."),\n".
   "  \"info\" => array(".join(", ", map { "\"".escape(html_collapse($_, 1))."\""; } @{$class{"data"}})."),\n".
   "  \"tags\" => array(".join(", ", map { "\"".escape(html_collapse($_, 0))."\""; } @{$class{"data"}})."),\n".
-  "  \"maxdays\" => $maxDays,\n".
-  "  \"maxhours\" => $maxHours,\n".
+  "  \"maxDays\" => $maxDays,\n".
+  "  \"firstHour\" => $firstHour,\n".
+  "  \"lastHour\" => $lastHour,\n".
+  "  \"totalHours\" => $totalHours\n".
   ");\n\n";
 
+  print "\$classHourTimes = array(\n";
+  foreach my $chour (@$hourTimes) {
+    print "  array(\"start\" => ".$$chour{"start"}.", \"end\" => ".$$chour{"end"}."),\n";
+  }
+  print ");\n\n";
+
   print "\$classHourDefs = array(\n";
   foreach my $cid (sort { $a <=> $b } keys %{$hourDefs}) {
     print "  $cid => array(";
@@ -380,17 +432,16 @@
     }
     print "),\n";
   }
+  print ");\n\n";
 
-  print ");\n".
-  "\n".
-  "\$classHourTable = array(\n";
-  for (my $y = 0; $y < $maxHours; $y++) {
-    my $str = "";
-    for (my $x = 0; $x < $maxDays; $x++) {
-      $str .= ", " unless ($str eq "");
-      $str .= sprintf "%3d", $$hourTable{$y}{$x};
+  print
+  "\$classDayTable = array(\n";
+  for (my $y = 0; $y < $maxDays; $y++)
+  {
+    if (defined($$hourTable{$y}))
+    {
+      print "  $y => array(".join(", ", @{$$hourTable{$y}})."),\n";
     }
-    print "  array(".$str."),\n";
   }
   print ");\n?>\n";
 }
@@ -402,7 +453,8 @@
   "  <general>".join("", map { "<node>".encode_entities($_)."</node>"; } @{$class{"info"}})."</general>\n".
   "  <info>".join("", map { "<node>".encode_entities(html_collapse($_, 1))."</node>"; } @{$class{"data"}})."</info>\n".
   "  <maxdays>$maxDays</maxdays>\n".
-  "  <maxhours>$maxHours</maxhours>\n".
+  "  <firsthour>$firstHour</firsthour>\n".
+  "  <lasthour>$lastHour</lasthour>\n".
   " </class>\n";