Mercurial > hg > lukkari
changeset 1:21fde93375e9
Add beta code.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 11 Jan 2011 20:43:12 +0200 |
parents | 02f0f79f98b6 |
children | 2471bb891299 |
files | beta.php fetchdata.pl luk.css |
diffstat | 3 files changed, 723 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta.php Tue Jan 11 20:43:12 2011 +0200 @@ -0,0 +1,251 @@ +<? +$pageCSS = array("http://tnsp.org/docs1.css", "luk.css"); +$pageCharset = "iso-8859-15"; +$luokkaDefault = "TTE9SNO"; +$mapFile = "kartta.png"; +$classFile = "luokkatilat.txt"; +$cacheFile = "coursecache.txt"; +$baseURI = "http://tnsp.org/luk/?"; +$infoURI = "http://www.oamk.fi/opiskelijalle/rakenne/opinto-opas/koulutusohjelmat/?sivu=oj&kieli=FI&opas=2010-2011&vuosi=10S11K&koodi1="; + +$showDays = 6; +$dayNames = array("Maanantai", "Tiistai", "Keskiviikko", "Torstai", "Perjantai", "Lauantai", "Sunnuntai"); + + +require "mcommon.inc.php"; +require "merrors.inc.php"; + +function getHour($hour) +{ + return "<br />".($hour + 8).":15 - ".($hour + 9).":00<br /><br />"; +} + +function checkClassID(&$id) +{ + global $luokkaDefault; + if (!preg_match("#^[A-Z]{3}\d\S+#", $id)) { + errorMsg("Virhe! Luokan täytyy olla muotoa <b>XXXnXXX</b>, käytetään vakioavoa <b>".$luokkaDefault."</b>."); + $id = $luokkaDefault; + return FALSE; + } else + return TRUE; +} + + +// Check given parameters +if (isset($_GET["luokka"])) { + $luokka = $_GET["luokka"]; + if (checkClassID($luokka)) { + setcookie("lukluokka", $luokka, time() + 365*24*60*60); // expire in a year + } +} else +if (isset($_COOKIE["lukluokka"])) { + $luokka = $_COOKIE["lukluokka"]; + checkClassID($luokka); +} else { + errorMsg("Luokkaa ei asetettu, käytetään vakioarvoa <b>".$luokkaDefault."</b>."); + $luokka = $luokkaDefault; +} + + +if (isset($_GET["tila"])) { + $tila = $_GET["tila"]; + echo "Luokkatilan näyttömoodi ei vielä tuettu.<br />\n"; + exit; + + $fp = @fopen($classFile, "rb"); + if ($fp) { + fclose($fp); + } +} + + +function matchCourse($id) +{ + global $cache, $infoURI, $cacheDirty; + + // Check if course exists in cache + if (!isset($cache[$id])) { + $cacheDirty = TRUE; + // Not cached, try to fetch data + $data = @file_get_contents($infoURI.$id); + if ($data !== FALSE) { + if (preg_match("#<td class=\"smallheadercell\"><strong>(.+?)\s+(\d+)\s*op\s*</strong></td>#", $data, $m)) { + // Add data to cache + $cache[$id] = array("desc" => $m[1], "op" => intval($m[2])); + } + } + } + + if (isset($cache[$id])) + return "<a target=\"_blank\" title=\"".htmlentities($id." - ".$cache[$id]["op"]." op"). + "\" href=\"".htmlentities($infoURI.$id)."\">".htmlentities($cache[$id]["desc"])."</a>"; + else + return htmlentities($id); +} + + +function matchClass($matches) +{ + global $baseURI; + return "<b><a href=\"".$baseURI."tila=".$matches[1]."\">".$matches[1]."</a></b> ".$matches[2]; +} + + +// Global cache for course data +$cache = array(); +$cacheDirty = FALSE; + + +// Try to read cachefile, if we can get file lock on it +$fp = @fopen($cacheFile, "rb"); +if ($fp) { + if (flock($fp, LOCK_SH)) { + require($cacheFile); + flock($fp, LOCK_UN); + } + fclose($fp); +} + + +if (file_exists($luokka.".data")) { + require($luokka.".data"); +} else { + errorMsg("Luokan ".htmlentities($luokka)." datatiedostoa ei löytynyt!"); +} + +$totalHours = 0; +$totalGrouped = 0; +$dayHours = array(); +for ($day = 0; $day < $showDays; $day++) + $dayHours[$day]["total"] = $dayHours[$day]["grouped"] = 0; + +foreach ($classDefs as $id => $data) { + $h = $data["hours"]; + + $totalHours += $h; + $dayHours[$data["day"]]["total"] += $h; + + if (!$data["grouped"]) { + $totalGrouped += $h; + $dayHours[$data["day"]]["grouped"] += $h; + } +} + +for ($day = $showDays - 1; $day >= 0; $day--) { + if ($dayHours[$day]["total"] == 0) + $showDays--; + else + break; +} + + + +$pageTitle = $luokka." / ".join("; ", $classInfo["info"]); +printPageHeader($pageTitle); +echo "<h1>".$pageTitle."</h1>\n". +"<p>".join("; ", $classInfo["general"])."</p>\n"; + +echo "<table class=\"timetable\">". +" <tr>\n <th></th>\n"; +for ($day = 0; $day < $showDays; $day++) { + echo " <th class=\"days\">".$dayNames[$day]."</th>\n"; +} +echo " </tr>\n"; + +for ($hour = 0; $hour < $classInfo["maxhours"]; $hour++) { + echo " <tr>\n"; + echo " <th class=\"hours\">".getHour($hour)."</th>\n"; + for ($day = 0; $day < $showDays; $day++) { + if (isset($classHourTable[$hour][$day])) { + $h = $classHourTable[$hour][$day]; + + if ($h < 1) { + if ($h == 0) { + $n = 0; + for ($i = $hour; $i < $classInfo["maxhours"]; $i++) + if ($classHourTable[$i][$day] == $h) { + $classHourTable[$i][$day] = -1; + $n++; + } else + break; + echo " <td rowspan=\"".$n."\" class=\"clnothing\"></td>\n"; + } + } else + if (isset($classDefs[$h])) { + if (!isset($classDefs[$h]["done"])) { + $classDefs[$h]["done"] = true; + $i = $classDefs[$h]; + $d = $classDefs[$h]["data"]; + + $isSplit = preg_match("/^[A-Z]\d{6}$/", $d[1]); + + echo " <td rowspan=\"".$i["hours"]. + "\" class=\"".($isSplit || $i["grouped"] ? "clgrouped" : "clnormal")."\">"; + + if ($isSplit) { + echo "<table>". + "<tr><td>".matchCourse($d[0])."</td><td>".matchCourse($d[1])."</td></tr>"; + for ($j = 2; $j < count($d); $j += 2) + echo "<tr><td>".htmlentities($d[$j])."</td><td>".(isset($d[$j+1]) ? htmlentities($d[$j+1]) : "")."</td></tr>"; + echo "</table>"; + } else { + echo matchCourse($d[0])."<br />"; + for ($j = 1; $j < count($d); $j++) + echo htmlentities($d[$j])."<br />"; + } + echo "</td>\n"; + } + } else + errorMsg("Internal error cell $hour / $day : hour id $h does not exist!"); + } else + errorMsg("Internal error, cell $hour / $day does not exist."); + } + echo " </tr>\n"; +} + +echo " <tr>\n <td>Tunteja (<b>ryhmä</b>)</td>\n"; +for ($day = 0; $day < $showDays; $day++) { + echo " <td>".$dayHours[$day]["total"]."h (<b>".$dayHours[$day]["grouped"]."h</b>)</td>\n"; +} +echo " </tr>\n"; + +echo "</table>"; + + +// Show error messages +if ($errorSet) { + echo "<ul>\n"; + foreach ($errorMsgs as $msg) + echo "<li>$msg</li>\n"; + echo "</ul>\n"; +} + +printPageFooter(); + + +// Dump the course data cache, but only if it has changed +if ($cacheDirty) { + // First try append mode + $fp = @fopen($cacheFile, "rb+"); + + // If file didn't exist, try write mode + if (!$fp) + $fp = @fopen($cacheFile, "wb"); + + if ($fp) { + // Use locking to prevent concurrent access and dump data + if (flock($fp, LOCK_EX)) { + ftruncate($fp, 0); + fwrite($fp, "<?\n\$cache = array(\n"); + foreach ($cache as $id => $data) { + fwrite($fp, " \"".addslashes($id)."\" => array(\"desc\" => \"". + addslashes($data["desc"])."\", \"op\" => ".$data["op"]."),\n"); + } + fwrite($fp, ");\n?>"); + } + fclose($fp); + } +} + +?> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetchdata.pl Tue Jan 11 20:43:12 2011 +0200 @@ -0,0 +1,438 @@ +#!/usr/bin/perl -w +# +# Fetch and parse HTML format class timetable into more sane formats +# (C) Copyright 2010-2010 Matti Hämäläinen <ccr@tnsp.org> +# +use strict; +use Data::Dumper; +use LWP::UserAgent; +use HTML::Entities; + +my $userAgent = "Lukkari/0.7"; + + +sub urlencode($) +{ + my $value = $_[0]; + $value =~ s/([^a-zA-Z_0-9 ])/"%" . uc(sprintf "%lx" , unpack("C", $1))/eg; + $value =~ tr/ /+/; + return $value; +} + + +sub str_trim($) +{ + my $str = $_[0]; + if (defined($str)) { + $str =~ s/^\s*//; + $str =~ s/\s*$//; + } + return $str; +} + + +sub pop_token_a($) +{ + my $tokens = $_[0]; + return shift(@$tokens); +} + + +sub pop_token($) +{ + return str_trim(pop_token_a($_[0])); +} + + +sub parse_html_str($) +{ + my $tokens = $_[0]; + my $token = pop_token($tokens); + my $str = ""; + $token =~ s/^\s*//; + + return undef unless (substr($token, 0, 1) eq '"'); + $token = substr($token, 1); + + while (defined($token)) { + if (substr($token, -1) eq '"') { + $str .= substr($token, 0, -1); + return $str; + } else { + $str .= $token; + } + $token = shift(@$tokens); + } + return undef; +} + + +sub parse_html_tree($$); + +sub parse_html_tree($$) +{ + my ($tokens, $tree) = @_; + + while (my $token = pop_token($tokens)) { + if ($token =~ /^<[!\/]?[a-zA-Z]+/) { + $token = lc($token); + if ($token =~ /^<\!.*>$/) { + # Ignore comments etc. + } elsif ($token =~ /^<([a-z]+)(.*)>$/) { + my ($name, $args) = ($1, $2); + if ($name eq "meta" || $name eq "img") { + my $tmp = {}; + parse_html_tree($tokens, $tree); + return $tree; + } else { + my $tmp = { "name" => $name, "args" => str_trim($args) }; + parse_html_tree($tokens, $tmp); + push(@{$$tree{"nodes"}}, $tmp); + } + } elsif ($token =~ /^<\/([a-z]+)>$/) { + return $tree; + } else { + die("HORROR TERROR ELITE: $token\n"); + } + } else { + $token = str_trim(decode_entities($token)); + push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; + } + } + + return $tree; +} + + +sub parse_html($) +{ + return undef unless defined($_[0]); + my $str = $_[0]; + my $res = { "name" => "", "args" => "" }; + $str =~ tr/\r/ /; + $str =~ tr/\n/ /; + my @tokens = grep { !($_ =~ /^\s*$/) } split(/(<\/?[a-zA-Z]+.*?>)/, $str); + parse_html_tree(\@tokens, $res); + return $res; +} + + +sub http_fetch($) +{ + my $agent = LWP::UserAgent->new; + $agent->agent($userAgent); + $agent->timeout(10); + + my $req = HTTP::Request->new(GET => $_[0]); + $req->user_agent($userAgent); + my $res = $agent->request($req); + + if ($res->is_success) { + return $res->content; + } else { + print STDERR "HTTP request failed: [".$res->code."] ".$res->message."\n"; + return undef; + } +} + + +sub html_find_node($$$); + +sub html_find_node($$$) +{ + my ($node, $name, $args) = @_; + + if (defined($node)) { + if (ref($node) eq "ARRAY") { + foreach my $n (@$node) { + my $tmp = html_find_node($n, $name, $args); + # Must do it like this, in order not to break the loop + return $tmp if defined($tmp); + } + } elsif (ref($node) eq "HASH") { + if (defined($$node{"name"})) { + if ($$node{"name"} eq $name) { + if ($args ne "") { + if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) { + } else { + return undef; + } + } + return $node; + } else { + return html_find_node($$node{"nodes"}, $name, $args); + } + } + } + } + return undef; +} + + +sub fnode($$) +{ + return html_find_node($_[0], $_[1], ""); +} + + +sub fnodea($$$) +{ + return html_find_node($_[0], $_[1], $_[2]); +} + + +sub escape($) +{ + my $s = $_[0]; + $s =~ s/(['"])/\\$1/g; + return $s; +} + + +sub html_collapse($$); + +sub html_collapse($$) +{ + my ($node, $strip) = @_; + my $str = ""; + + if ($$node{"name"} eq "text") { + $str .= $$node{"text"}; + } else { + $str .= "<".$$node{"name"}.">" unless ($strip); + foreach my $n (@{$$node{"nodes"}}) { + $str .= html_collapse($n, $strip); + } + $str .= "</".$$node{"name"}.">" unless ($strip); + } + + return $str; +} + + +### +### Main program +### +my $modes = "simple|php|xml"; +my $opt_mode = "php"; +my $opt_dump = 0; +my $opt_filename; +my $opt_outfile; + +while (defined(my $arg = shift)) { + if (substr($arg, 0, 1) eq "-") { + if ($arg =~ /^-($modes)$/o) { + $opt_mode = $1; + } + elsif ($arg eq "-dump") { + $opt_dump = 1; + } + elsif ($arg eq "-o") { + $opt_outfile = shift or die("Output filename option -o requires an argument.\n"); + } else { + die("Invalid option '$arg'.\n"); + } + } else { + $opt_filename = $arg; + } +} + +die("Usage: $0 [options] <filename|URI> + + -php Output a PHP include file with data in arrays (default) + -simple Output simple tabled output for easy parsing. + -xml Output XML. + + -o <filename> Set output filename. Default is to use stdout. + + -dump Dump HTML tree to stdout and quit. + +") unless defined($opt_filename); + + +my $data; +if ($opt_filename =~ /^(http|https):/) { + $data = http_fetch($opt_filename) or die("Could not fetch: $opt_filename\n"); +} else { + open(my $fh, '<', $opt_filename) or die("Error opening '$opt_filename': $!\n"); + $data = do { local $/; <$fh> }; + close($fh); +} + +die("No data in input.\n") unless (defined($data) && $data ne ""); + + +# Filter out certain unneeded elements +$data =~ s/<font[^>]*>//ig; +$data =~ s/<\/font>//ig; +$data =~ s/<\/?center>//ig; +$data =~ s/<br>//ig; +$data =~ s/ / /ig; + +### Get some general information +my $otree = parse_html($data); +if ($opt_dump) { + print Dumper(fnode($otree, "html")); + exit; +} + +my %class = (); +my $body = fnode($otree, "body"); +if (defined($body) && defined($$body{"nodes"})) { + foreach my $n (@{$$body{"nodes"}}) { + if ($$n{"name"} eq "text") { + push(@{$class{"info"}}, $$n{"text"}); + } + elsif ($$n{"name"} eq "b") { + push(@{$class{"data"}}, $n); + } + } +} + +# Filter out some more, for easier tree access during table parsing +$data =~ s/<\/?b>//ig; +my $tree = parse_html($data); +my $node = fnode(fnode($tree, "body"), "table"); +die("No table element found in document. Perhaps the format has changed? :(\n") unless defined($node); + + +### Parse through the HTML document node tree to find the data we need +my $id = 0; +my $q = $$node{"nodes"}; +my $tunnit = {}; +my $taulu = {}; +my $maxdays = 6; +my $maxhours = 0; + +# Skip zero this way +for (my $i = 1; $i < scalar(@{$q}); $i++) { + my $d = $$q[$i]{"nodes"}; + if (defined($d)) { + foreach my $n (@{$d}) { + my $l = $$n{"nodes"}[0]{"nodes"}; + if (defined($l) && $$n{"args"} =~ /colspan=6\s+rowspan=(\d+)/) { + my $tuntia = $1 / 2; + my $data = []; + my $grouped = 0; + foreach my $h (@{$l}) { + if (defined($$h{"nodes"})) { + foreach my $b (@{$$h{"nodes"}}) { + if (defined($$b{"nodes"})) { + my $text = $$b{"nodes"}[0]{"text"}; + $text =~ s/\.$//; + + $grouped = 1 if ($text =~ /vuorov/); + + push(@$data, $text); + } + } + } + } + + my $tid; + if (scalar(@$data) > 0) { + $id++; + $tid = $id; + } else { + $tid = 0; + } + + my $tpd = 0; + for (my $x = 0; $x < $maxdays; $x++) { + if (!defined($$taulu{$maxhours}{$x})) { + $tpd = $x; + last; + } + } + for (my $t = 0; $t < $tuntia; $t++) { + $$taulu{$maxhours + $t}{$tpd} = $tid; + } + + if (scalar(@$data) > 0) { + # Grouped, if there is another class ID in second slot + $grouped = 1 if ($$data[1] =~ /^[A-Z]\d{6}$/); + $$tunnit{$id} = { "grouped" => $grouped, "day" => $tpd, "start" => $maxhours, "hours" => $tuntia, "data" => $data }; + } + } + } + $maxhours++; + } +} + + +### Open output file, if specified +if (defined($opt_outfile)) { + open(STDOUT, '>', $opt_outfile) or die("Could not open output file '$opt_outfile'.\n"); +} + + +### Output data in desired format +if ($opt_mode eq "php") { + print "<?\n". + "\$classInfo = array(\n". + " \"general\" => array(".join(", ", map { "\"".escape($_)."\""; } @{$class{"info"}})."),\n". + " \"info\" => array(".join(", ", map { "\"".escape(html_collapse($_, 1))."\""; } @{$class{"data"}})."),\n". + " \"info_tags\" => array(".join(", ", map { "\"".escape(html_collapse($_, 0))."\""; } @{$class{"data"}})."),\n". + " \"maxdays\" => $maxdays,\n". + " \"maxhours\" => $maxhours,\n". + ");\n\n"; + + print "\$classDefs = array(\n"; + foreach my $id (sort { $a <=> $b } keys %{$tunnit}) { + print " $id => array("; + foreach my $key (keys %{$$tunnit{$id}}) { + my $a = $$tunnit{$id}{$key}; + print "\"$key\" => "; + if (ref($a) eq "ARRAY") { + print "array(".join(", ", map { "\"".escape($_)."\""; } @$a).")"; + } + elsif ($a =~ /^\d+$/) { + print $a; + } else { + print "\"".escape($a)."\""; + } + print ", "; + } + print "),\n"; + } + + print ");\n". + "\n". + "\$classHourTable = array(\n"; + for (my $y = 0; $y < $maxhours; $y++) { + my $str = ""; + for (my $x = 0; $x < $maxdays; $x++) { + $str .= ", " unless ($str eq ""); + $str .= sprintf "%3d", $$taulu{$y}{$x}; + } + print " array(".$str."),\n"; + } + print ");\n?>\n"; +} + +elsif ($opt_mode eq "xml") { + print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n". + "<timetable>\n". + " <class>\n". + " <general>".join("", map { "<node>".encode_entities($_)."</node>"; } @{$class{"info"}})."</general>\n". + " <info>".join("", map { "<node>".encode_entities(html_collapse($_, 1))."</node>"; } @{$class{"data"}})."</info>\n". + " <maxdays>$maxdays</maxdays>\n". + " <maxhours>$maxhours</maxhours>\n". + " </class>\n"; + + + print "</timetable>\n"; +} + + + +elsif ($opt_mode eq "simple") { + for (my $y = 0; $y < $maxhours; $y++) { + for (my $x = 0; $x < $maxdays; $x++) { + printf "%3d ", $$taulu{$y}{$x}; + } + print "\n"; + } +} + +close (STDOUT);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/luk.css Tue Jan 11 20:43:12 2011 +0200 @@ -0,0 +1,34 @@ +table.timetable { + font-size: 10pt; +} + +td > a, td > a:visited, td > a:active { + text-decoration: none; + text-shadow: 2px 2px 2px #000; + color: #fff; +} + +td > a:hover { + color: yellow; +} + + +th.hours { + white-space: nowrap; +} + +table.timetable * table { + width: 100%; + height: 100%; +} + +td.clgrouped, td.clgrouped * td { + background: #454; +} + +td.clnothing { + background: #222; +} + +td.clnormal { +}