changeset 1:21fde93375e9

Add beta code.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 Jan 2011 20:43:12 +0200
parents 02f0f79f98b6
children 2471bb891299
files beta.php fetchdata.pl luk.css
diffstat 3 files changed, 723 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/beta.php	Tue Jan 11 20:43:12 2011 +0200
@@ -0,0 +1,251 @@
+<?
+$pageCSS = array("http://tnsp.org/docs1.css", "luk.css");
+$pageCharset = "iso-8859-15";
+$luokkaDefault = "TTE9SNO";
+$mapFile = "kartta.png";
+$classFile = "luokkatilat.txt";
+$cacheFile = "coursecache.txt";
+$baseURI = "http://tnsp.org/luk/?";
+$infoURI = "http://www.oamk.fi/opiskelijalle/rakenne/opinto-opas/koulutusohjelmat/?sivu=oj&kieli=FI&opas=2010-2011&vuosi=10S11K&koodi1=";
+
+$showDays = 6;
+$dayNames = array("Maanantai", "Tiistai", "Keskiviikko", "Torstai", "Perjantai", "Lauantai", "Sunnuntai");
+
+
+require "mcommon.inc.php";
+require "merrors.inc.php";
+
+function getHour($hour)
+{
+  return "<br />".($hour + 8).":15 - ".($hour + 9).":00<br /><br />";
+}
+
+function checkClassID(&$id)
+{
+  global $luokkaDefault;
+  if (!preg_match("#^[A-Z]{3}\d\S+#", $id)) {
+    errorMsg("Virhe! Luokan täytyy olla muotoa <b>XXXnXXX</b>, käytetään vakioavoa <b>".$luokkaDefault."</b>.");
+    $id = $luokkaDefault;
+    return FALSE;
+  } else
+    return TRUE;
+}
+
+
+// Check given parameters
+if (isset($_GET["luokka"])) {
+  $luokka = $_GET["luokka"];
+  if (checkClassID($luokka)) {
+    setcookie("lukluokka", $luokka, time() + 365*24*60*60); // expire in a year
+  }
+} else
+if (isset($_COOKIE["lukluokka"])) {
+  $luokka = $_COOKIE["lukluokka"];
+  checkClassID($luokka);
+} else {
+  errorMsg("Luokkaa ei asetettu, käytetään vakioarvoa <b>".$luokkaDefault."</b>.");
+  $luokka = $luokkaDefault;
+}
+
+
+if (isset($_GET["tila"])) {
+  $tila = $_GET["tila"];
+  echo "Luokkatilan n&auml;ytt&ouml;moodi ei viel&auml; tuettu.<br />\n";
+  exit;
+
+  $fp = @fopen($classFile, "rb");
+  if ($fp) {
+    fclose($fp);
+  }
+}
+
+  
+function matchCourse($id)
+{
+  global $cache, $infoURI, $cacheDirty;
+
+  // Check if course exists in cache
+  if (!isset($cache[$id])) {
+    $cacheDirty = TRUE;
+    // Not cached, try to fetch data
+    $data = @file_get_contents($infoURI.$id);
+    if ($data !== FALSE) {
+      if (preg_match("#<td class=\"smallheadercell\"><strong>(.+?)\s+(\d+)\s*op\s*</strong></td>#", $data, $m)) {
+        // Add data to cache
+        $cache[$id] = array("desc" => $m[1], "op" => intval($m[2]));
+      }
+    }
+  }
+  
+  if (isset($cache[$id]))
+    return "<a target=\"_blank\" title=\"".htmlentities($id." - ".$cache[$id]["op"]." op").
+    "\" href=\"".htmlentities($infoURI.$id)."\">".htmlentities($cache[$id]["desc"])."</a>";
+  else
+    return htmlentities($id);
+}
+
+
+function matchClass($matches)
+{
+  global $baseURI;
+  return "<b><a href=\"".$baseURI."tila=".$matches[1]."\">".$matches[1]."</a></b> ".$matches[2];
+}
+
+
+// Global cache for course data
+$cache = array();
+$cacheDirty = FALSE;
+
+
+// Try to read cachefile, if we can get file lock on it
+$fp = @fopen($cacheFile, "rb");
+if ($fp) {
+  if (flock($fp, LOCK_SH)) { 
+    require($cacheFile);
+    flock($fp, LOCK_UN);
+  }
+  fclose($fp);
+}
+
+
+if (file_exists($luokka.".data")) {
+  require($luokka.".data");
+} else {
+  errorMsg("Luokan ".htmlentities($luokka)." datatiedostoa ei löytynyt!");
+}
+
+$totalHours = 0;
+$totalGrouped = 0;
+$dayHours = array();
+for ($day = 0; $day < $showDays; $day++) 
+  $dayHours[$day]["total"] = $dayHours[$day]["grouped"] = 0;
+
+foreach ($classDefs as $id => $data) {
+  $h = $data["hours"];
+  
+  $totalHours += $h;
+  $dayHours[$data["day"]]["total"] += $h;
+
+  if (!$data["grouped"]) {
+    $totalGrouped += $h;
+    $dayHours[$data["day"]]["grouped"] += $h;
+  }
+}
+
+for ($day = $showDays - 1; $day >= 0; $day--) {
+  if ($dayHours[$day]["total"] == 0)
+    $showDays--;
+  else
+    break;
+}
+
+
+
+$pageTitle = $luokka." / ".join("; ", $classInfo["info"]);
+printPageHeader($pageTitle);
+echo "<h1>".$pageTitle."</h1>\n".
+"<p>".join("; ", $classInfo["general"])."</p>\n";
+
+echo "<table class=\"timetable\">".
+" <tr>\n  <th></th>\n";
+for ($day = 0; $day < $showDays; $day++) {
+  echo  "  <th class=\"days\">".$dayNames[$day]."</th>\n";
+}
+echo " </tr>\n";
+
+for ($hour = 0; $hour < $classInfo["maxhours"]; $hour++) {
+  echo " <tr>\n";
+  echo "  <th class=\"hours\">".getHour($hour)."</th>\n";
+  for ($day = 0; $day < $showDays; $day++) {
+    if (isset($classHourTable[$hour][$day])) {
+      $h = $classHourTable[$hour][$day];
+        
+      if ($h < 1) {
+        if ($h == 0) {
+          $n = 0;
+          for ($i = $hour; $i < $classInfo["maxhours"]; $i++)
+          if ($classHourTable[$i][$day] == $h) {
+            $classHourTable[$i][$day] = -1;
+            $n++;
+          } else
+            break;
+          echo "  <td rowspan=\"".$n."\" class=\"clnothing\"></td>\n";
+        }
+      } else
+      if (isset($classDefs[$h])) {
+        if (!isset($classDefs[$h]["done"])) {
+          $classDefs[$h]["done"] = true;
+          $i = $classDefs[$h];
+          $d = $classDefs[$h]["data"];
+          
+          $isSplit = preg_match("/^[A-Z]\d{6}$/", $d[1]);
+          
+          echo "  <td rowspan=\"".$i["hours"].
+          "\" class=\"".($isSplit || $i["grouped"] ? "clgrouped" : "clnormal")."\">";
+          
+          if ($isSplit) {
+            echo "<table>".
+            "<tr><td>".matchCourse($d[0])."</td><td>".matchCourse($d[1])."</td></tr>";
+            for ($j = 2; $j < count($d); $j += 2)
+              echo "<tr><td>".htmlentities($d[$j])."</td><td>".(isset($d[$j+1]) ? htmlentities($d[$j+1]) : "")."</td></tr>";
+            echo "</table>";
+          } else {
+            echo matchCourse($d[0])."<br />";
+            for ($j = 1; $j < count($d); $j++)
+              echo htmlentities($d[$j])."<br />";
+          }
+          echo "</td>\n";
+        }
+      } else
+        errorMsg("Internal error cell $hour / $day : hour id $h does not exist!");
+    } else
+      errorMsg("Internal error, cell $hour / $day does not exist.");
+  }
+  echo " </tr>\n";
+}
+
+echo " <tr>\n  <td>Tunteja (<b>ryhmä</b>)</td>\n";
+for ($day = 0; $day < $showDays; $day++) {
+  echo  "  <td>".$dayHours[$day]["total"]."h (<b>".$dayHours[$day]["grouped"]."h</b>)</td>\n";
+}
+echo " </tr>\n";
+
+echo "</table>";
+
+
+// Show error messages
+if ($errorSet) {
+  echo "<ul>\n";
+  foreach ($errorMsgs as $msg)
+    echo "<li>$msg</li>\n";
+  echo "</ul>\n";
+}
+
+printPageFooter();
+
+
+// Dump the course data cache, but only if it has changed
+if ($cacheDirty) {
+  // First try append mode
+  $fp = @fopen($cacheFile, "rb+");
+
+  // If file didn't exist, try write mode
+  if (!$fp)
+    $fp = @fopen($cacheFile, "wb");
+
+  if ($fp) {
+    // Use locking to prevent concurrent access and dump data
+    if (flock($fp, LOCK_EX)) {
+      ftruncate($fp, 0);
+      fwrite($fp, "<?\n\$cache = array(\n");
+      foreach ($cache as $id => $data) {
+        fwrite($fp, "  \"".addslashes($id)."\" => array(\"desc\" => \"".
+        addslashes($data["desc"])."\", \"op\" => ".$data["op"]."),\n");
+      }
+      fwrite($fp, ");\n?>");
+    }
+    fclose($fp);
+  }
+}
+
+?>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fetchdata.pl	Tue Jan 11 20:43:12 2011 +0200
@@ -0,0 +1,438 @@
+#!/usr/bin/perl -w
+#
+# Fetch and parse HTML format class timetable into more sane formats
+# (C) Copyright 2010-2010 Matti Hämäläinen <ccr@tnsp.org>
+#
+use strict;
+use Data::Dumper;
+use LWP::UserAgent;
+use HTML::Entities;
+
+my $userAgent = "Lukkari/0.7";
+
+
+sub urlencode($)
+{
+  my $value = $_[0];
+  $value =~ s/([^a-zA-Z_0-9 ])/"%" . uc(sprintf "%lx" , unpack("C", $1))/eg;
+  $value =~ tr/ /+/;
+  return $value;
+}
+
+
+sub str_trim($)
+{
+  my $str = $_[0];
+  if (defined($str)) {
+    $str =~ s/^\s*//;
+    $str =~ s/\s*$//;
+  }
+  return $str;
+}
+
+
+sub pop_token_a($)
+{
+  my $tokens = $_[0];
+  return shift(@$tokens);
+}
+
+
+sub pop_token($)
+{
+  return str_trim(pop_token_a($_[0]));
+}
+
+
+sub parse_html_str($)
+{
+  my $tokens = $_[0];
+  my $token = pop_token($tokens);
+  my $str = "";
+  $token =~ s/^\s*//;
+
+  return undef unless (substr($token, 0, 1) eq '"');
+  $token = substr($token, 1);
+  
+  while (defined($token)) {
+    if (substr($token, -1) eq '"') {
+      $str .= substr($token, 0, -1);
+      return $str;
+    } else {
+      $str .= $token;
+    }
+    $token = shift(@$tokens);
+  }
+  return undef;
+}
+
+
+sub parse_html_tree($$);
+
+sub parse_html_tree($$)
+{
+  my ($tokens, $tree) = @_;
+
+  while (my $token = pop_token($tokens)) {
+    if ($token =~ /^<[!\/]?[a-zA-Z]+/) {
+      $token = lc($token);
+      if ($token =~ /^<\!.*>$/) {
+        # Ignore comments etc.
+      } elsif ($token =~ /^<([a-z]+)(.*)>$/) {
+        my ($name, $args) = ($1, $2);
+        if ($name eq "meta" || $name eq "img") {
+          my $tmp = {};
+          parse_html_tree($tokens, $tree);
+          return $tree;
+        } else {
+          my $tmp = { "name" => $name, "args" => str_trim($args) };
+          parse_html_tree($tokens, $tmp);
+          push(@{$$tree{"nodes"}}, $tmp);
+        }
+      } elsif ($token =~ /^<\/([a-z]+)>$/) {
+        return $tree;
+      } else {
+        die("HORROR TERROR ELITE: $token\n");
+      }
+    } else {
+      $token = str_trim(decode_entities($token));
+      push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0;
+    }
+  }
+  
+  return $tree;
+}
+
+
+sub parse_html($)
+{
+  return undef unless defined($_[0]);
+  my $str = $_[0];
+  my $res = { "name" => "", "args" => "" };
+  $str =~ tr/\r/ /;
+  $str =~ tr/\n/ /;
+  my @tokens = grep { !($_ =~ /^\s*$/) } split(/(<\/?[a-zA-Z]+.*?>)/, $str);
+  parse_html_tree(\@tokens, $res);
+  return $res;
+}
+
+
+sub http_fetch($)
+{
+  my $agent = LWP::UserAgent->new;
+  $agent->agent($userAgent);
+  $agent->timeout(10);
+
+  my $req = HTTP::Request->new(GET => $_[0]);
+  $req->user_agent($userAgent);
+  my $res = $agent->request($req);
+
+  if ($res->is_success) {
+    return $res->content;
+  } else {
+    print STDERR "HTTP request failed: [".$res->code."] ".$res->message."\n";
+    return undef;
+  }
+}
+
+
+sub html_find_node($$$);
+
+sub html_find_node($$$)
+{
+  my ($node, $name, $args) = @_;
+  
+  if (defined($node)) {
+    if (ref($node) eq "ARRAY") {
+      foreach my $n (@$node) {
+        my $tmp = html_find_node($n, $name, $args);
+        # Must do it like this, in order not to break the loop
+        return $tmp if defined($tmp);
+      }
+    } elsif (ref($node) eq "HASH") {
+      if (defined($$node{"name"})) {
+        if ($$node{"name"} eq $name) {
+          if ($args ne "") {
+            if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) {
+            } else {
+              return undef;
+            }
+          }
+          return $node;
+        } else {
+          return html_find_node($$node{"nodes"}, $name, $args);
+        }
+      }
+    }
+  }
+  return undef;
+}
+
+
+sub fnode($$)
+{
+  return html_find_node($_[0], $_[1], "");
+}
+
+
+sub fnodea($$$)
+{
+  return html_find_node($_[0], $_[1], $_[2]);
+}
+
+
+sub escape($)
+{
+  my $s = $_[0];
+  $s =~ s/(['"])/\\$1/g;
+  return $s;
+}
+
+
+sub html_collapse($$);
+
+sub html_collapse($$)
+{
+  my ($node, $strip) = @_;
+  my $str = "";
+
+  if ($$node{"name"} eq "text") {
+    $str .= $$node{"text"};
+  } else {
+    $str .= "<".$$node{"name"}.">" unless ($strip);
+    foreach my $n (@{$$node{"nodes"}}) {
+      $str .= html_collapse($n, $strip);
+    }
+    $str .= "</".$$node{"name"}.">" unless ($strip);
+  }
+  
+  return $str;
+}
+
+
+###
+### Main program
+###
+my $modes = "simple|php|xml";
+my $opt_mode = "php";
+my $opt_dump = 0;
+my $opt_filename;
+my $opt_outfile;
+
+while (defined(my $arg = shift)) {
+  if (substr($arg, 0, 1) eq "-") {
+    if ($arg =~ /^-($modes)$/o) {
+      $opt_mode = $1;
+    }
+    elsif ($arg eq "-dump") {
+      $opt_dump = 1;
+    }
+    elsif ($arg eq "-o") {
+      $opt_outfile = shift or die("Output filename option -o requires an argument.\n");
+    } else {
+      die("Invalid option '$arg'.\n");
+    }
+  } else {
+    $opt_filename = $arg;
+  }
+}
+
+die("Usage: $0 [options] <filename|URI>
+
+  -php               Output a PHP include file with data in arrays (default)
+  -simple            Output simple tabled output for easy parsing.
+  -xml               Output XML.
+
+  -o <filename>	     Set output filename. Default is to use stdout.
+
+  -dump	             Dump HTML tree to stdout and quit.
+
+") unless defined($opt_filename);
+
+
+my $data;
+if ($opt_filename =~ /^(http|https):/) {
+  $data = http_fetch($opt_filename) or die("Could not fetch: $opt_filename\n");
+} else {
+  open(my $fh, '<', $opt_filename) or die("Error opening '$opt_filename': $!\n");
+  $data = do { local $/; <$fh> };
+  close($fh);
+}
+
+die("No data in input.\n") unless (defined($data) && $data ne "");
+
+
+# Filter out certain unneeded elements
+$data =~ s/<font[^>]*>//ig;
+$data =~ s/<\/font>//ig;
+$data =~ s/<\/?center>//ig;
+$data =~ s/<br>//ig;
+$data =~ s/&nbsp;/ /ig;
+
+### Get some general information
+my $otree = parse_html($data);
+if ($opt_dump) {
+  print Dumper(fnode($otree, "html"));
+  exit;
+}
+
+my %class = ();
+my $body = fnode($otree, "body");
+if (defined($body) && defined($$body{"nodes"})) {
+  foreach my $n (@{$$body{"nodes"}}) {
+    if ($$n{"name"} eq "text") {
+      push(@{$class{"info"}}, $$n{"text"});
+    }
+    elsif ($$n{"name"} eq "b") {
+      push(@{$class{"data"}}, $n);
+    }
+  }
+}
+
+# Filter out some more, for easier tree access during table parsing
+$data =~ s/<\/?b>//ig;
+my $tree = parse_html($data);
+my $node = fnode(fnode($tree, "body"), "table");
+die("No table element found in document. Perhaps the format has changed? :(\n") unless defined($node);
+
+
+### Parse through the HTML document node tree to find the data we need
+my $id = 0;
+my $q = $$node{"nodes"};
+my $tunnit = {};
+my $taulu = {};
+my $maxdays = 6;
+my $maxhours = 0;
+
+# Skip zero this way
+for (my $i = 1; $i < scalar(@{$q}); $i++) {
+  my $d = $$q[$i]{"nodes"};
+  if (defined($d)) {
+    foreach my $n (@{$d}) {
+      my $l = $$n{"nodes"}[0]{"nodes"};
+      if (defined($l) && $$n{"args"} =~ /colspan=6\s+rowspan=(\d+)/) {
+        my $tuntia = $1 / 2;
+        my $data = [];
+        my $grouped = 0;
+        foreach my $h (@{$l}) {
+          if (defined($$h{"nodes"})) {
+            foreach my $b (@{$$h{"nodes"}}) {
+              if (defined($$b{"nodes"})) {
+                my $text = $$b{"nodes"}[0]{"text"};
+                $text =~ s/\.$//;
+
+                $grouped = 1 if ($text =~ /vuorov/);
+
+                push(@$data, $text);
+              }
+            }
+          }
+        }
+
+        my $tid;
+        if (scalar(@$data) > 0) {
+          $id++;
+          $tid = $id;
+        } else {
+          $tid = 0;
+        }
+
+        my $tpd = 0;
+        for (my $x = 0; $x < $maxdays; $x++) {
+          if (!defined($$taulu{$maxhours}{$x})) {
+            $tpd = $x;
+            last;
+          }
+        }
+        for (my $t = 0; $t < $tuntia; $t++) {
+          $$taulu{$maxhours + $t}{$tpd} = $tid;
+        }
+        
+        if (scalar(@$data) > 0) {
+          # Grouped, if there is another class ID in second slot
+          $grouped = 1 if ($$data[1] =~ /^[A-Z]\d{6}$/);
+          $$tunnit{$id} = { "grouped" => $grouped, "day" => $tpd, "start" => $maxhours, "hours" => $tuntia, "data" => $data };
+        }
+      }
+    }
+    $maxhours++;
+  }
+}
+
+
+### Open output file, if specified
+if (defined($opt_outfile)) {
+  open(STDOUT, '>', $opt_outfile) or die("Could not open output file '$opt_outfile'.\n");
+}
+
+
+### Output data in desired format
+if ($opt_mode eq "php") {
+  print "<?\n".
+  "\$classInfo = array(\n".
+  "  \"general\" => array(".join(", ", map { "\"".escape($_)."\""; } @{$class{"info"}})."),\n".
+  "  \"info\" => array(".join(", ", map { "\"".escape(html_collapse($_, 1))."\""; } @{$class{"data"}})."),\n".
+  "  \"info_tags\" => array(".join(", ", map { "\"".escape(html_collapse($_, 0))."\""; } @{$class{"data"}})."),\n".
+  "  \"maxdays\" => $maxdays,\n".
+  "  \"maxhours\" => $maxhours,\n".
+  ");\n\n";
+
+  print "\$classDefs = array(\n";
+  foreach my $id (sort { $a <=> $b } keys %{$tunnit}) {
+    print "  $id => array(";
+    foreach my $key (keys %{$$tunnit{$id}}) {
+      my $a = $$tunnit{$id}{$key};
+      print "\"$key\" => ";
+      if (ref($a) eq "ARRAY") {
+        print "array(".join(", ", map { "\"".escape($_)."\""; } @$a).")";
+      }
+      elsif ($a =~ /^\d+$/) {
+        print $a;
+      } else {
+        print "\"".escape($a)."\"";
+      }
+      print ", ";
+    }
+    print "),\n";
+  }
+
+  print ");\n".
+  "\n".
+  "\$classHourTable = array(\n";
+  for (my $y = 0; $y < $maxhours; $y++) {
+    my $str = "";
+    for (my $x = 0; $x < $maxdays; $x++) {
+      $str .= ", " unless ($str eq "");
+      $str .= sprintf "%3d", $$taulu{$y}{$x};
+    }
+    print "  array(".$str."),\n";
+  }
+  print ");\n?>\n";
+}
+
+elsif ($opt_mode eq "xml") {
+  print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".
+  "<timetable>\n".
+  " <class>\n".
+  "  <general>".join("", map { "<node>".encode_entities($_)."</node>"; } @{$class{"info"}})."</general>\n".
+  "  <info>".join("", map { "<node>".encode_entities(html_collapse($_, 1))."</node>"; } @{$class{"data"}})."</info>\n".
+  "  <maxdays>$maxdays</maxdays>\n".
+  "  <maxhours>$maxhours</maxhours>\n".
+  " </class>\n";
+
+
+  print "</timetable>\n";
+}
+
+
+
+elsif ($opt_mode eq "simple") {
+  for (my $y = 0; $y < $maxhours; $y++) {
+    for (my $x = 0; $x < $maxdays; $x++) {
+      printf "%3d ", $$taulu{$y}{$x};
+    }
+    print "\n";
+  }
+}
+
+close (STDOUT);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/luk.css	Tue Jan 11 20:43:12 2011 +0200
@@ -0,0 +1,34 @@
+table.timetable {
+	font-size: 10pt;
+}
+
+td > a, td > a:visited, td > a:active {
+	text-decoration: none;
+	text-shadow: 2px 2px 2px #000;
+	color: #fff;
+}
+
+td > a:hover {
+	color: yellow;
+}
+
+
+th.hours {
+	white-space: nowrap;
+}
+
+table.timetable * table {
+	width: 100%;
+	height: 100%;
+}
+
+td.clgrouped, td.clgrouped * td {
+	background: #454;
+}
+
+td.clnothing {
+	background: #222;
+}
+
+td.clnormal {
+}