comparison parsedata.pl @ 170:34ce8339b956

Cosmetic reformatting.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 24 Aug 2015 11:00:13 +0300
parents 04c4f1a95dbd
children eaac0a84a7d2
comparison
equal deleted inserted replaced
169:04c4f1a95dbd 170:34ce8339b956
1 #!/usr/bin/perl -w 1 #!/usr/bin/perl -w
2 # 2 #
3 # Fetch and parse HTML format class timetable into more sane formats 3 # Fetch and parse HTML format class timetable into more sane formats
4 # (C) Copyright 2010-2012 Matti Hämäläinen <ccr@tnsp.org> 4 # (C) Copyright 2010-2015 Matti Hämäläinen <ccr@tnsp.org>
5 # 5 #
6 use strict; 6 use strict;
7 use Data::Dumper; 7 use Data::Dumper;
8 use HTML::Entities; 8 use HTML::Entities;
9 9
28 28
29 29
30 sub str_trim($) 30 sub str_trim($)
31 { 31 {
32 my $str = $_[0]; 32 my $str = $_[0];
33 if (defined($str)) { 33 if (defined($str))
34 {
34 $str =~ s/^\s*//; 35 $str =~ s/^\s*//;
35 $str =~ s/\s*$//; 36 $str =~ s/\s*$//;
36 } 37 }
37 return $str; 38 return $str;
38 } 39 }
59 $token =~ s/^\s*//; 60 $token =~ s/^\s*//;
60 61
61 return undef unless (substr($token, 0, 1) eq '"'); 62 return undef unless (substr($token, 0, 1) eq '"');
62 $token = substr($token, 1); 63 $token = substr($token, 1);
63 64
64 while (defined($token)) { 65 while (defined($token))
66 {
65 my $tmp = $token; 67 my $tmp = $token;
66 $tmp =~ s/\s*$//; 68 $tmp =~ s/\s*$//;
67 if (substr($tmp, -1) eq '"') { 69 if (substr($tmp, -1) eq '"')
70 {
68 $str .= substr($tmp, 0, -1); 71 $str .= substr($tmp, 0, -1);
69 return $str; 72 return $str;
70 } else { 73 }
74 else
75 {
71 $str .= $token; 76 $str .= $token;
72 } 77 }
73 $token = shift(@$tokens); 78 $token = shift(@$tokens);
74 } 79 }
75 return undef; 80 return undef;
80 85
81 sub parse_html_tree($$) 86 sub parse_html_tree($$)
82 { 87 {
83 my ($tokens, $tree) = @_; 88 my ($tokens, $tree) = @_;
84 89
85 while (my $token = pop_token($tokens)) { 90 while (my $token = pop_token($tokens))
86 if ($token =~ /^<[!\/]?[a-zA-Z]+/) { 91 {
92 if ($token =~ /^<[!\/]?[a-zA-Z]+/)
93 {
87 $token = lc($token); 94 $token = lc($token);
88 if ($token =~ /^<\!.*>$/) { 95 if ($token =~ /^<\!.*>$/)
96 {
89 # Ignore comments etc. 97 # Ignore comments etc.
90 } elsif ($token =~ /^<([a-z]+)(.*)>$/) { 98 }
99 elsif ($token =~ /^<([a-z]+)(.*)>$/)
100 {
91 my ($name, $args) = ($1, $2); 101 my ($name, $args) = ($1, $2);
92 if ($name eq "meta" || $name eq "img") { 102 if ($name eq "meta" || $name eq "img")
103 {
93 my $tmp = {}; 104 my $tmp = {};
94 parse_html_tree($tokens, $tree); 105 parse_html_tree($tokens, $tree);
95 return $tree; 106 return $tree;
96 } else { 107 }
108 else
109 {
97 my $tmp = { "name" => $name, "args" => str_trim($args) }; 110 my $tmp = { "name" => $name, "args" => str_trim($args) };
98 parse_html_tree($tokens, $tmp); 111 parse_html_tree($tokens, $tmp);
99 push(@{$$tree{"nodes"}}, $tmp); 112 push(@{$$tree{"nodes"}}, $tmp);
100 } 113 }
101 } elsif ($token =~ /^<\/([a-z]+)>$/) { 114 }
115 elsif ($token =~ /^<\/([a-z]+)>$/)
116 {
102 return $tree; 117 return $tree;
103 } else { 118 }
119 else
120 {
104 die("HORROR TERROR ELITE: $token\n"); 121 die("HORROR TERROR ELITE: $token\n");
105 } 122 }
106 } else { 123 }
124 else
125 {
107 $token = str_trim(decode_entities($token)); 126 $token = str_trim(decode_entities($token));
108 push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; 127 push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0;
109 } 128 }
110 } 129 }
111 130
129 148
130 sub html_find_node($$$) 149 sub html_find_node($$$)
131 { 150 {
132 my ($node, $name, $args) = @_; 151 my ($node, $name, $args) = @_;
133 152
134 if (defined($node)) { 153 if (defined($node))
135 if (ref($node) eq "ARRAY") { 154 {
136 foreach my $n (@$node) { 155 if (ref($node) eq "ARRAY")
156 {
157 foreach my $n (@$node)
158 {
137 my $tmp = html_find_node($n, $name, $args); 159 my $tmp = html_find_node($n, $name, $args);
138 # Must do it like this, in order not to break the loop 160 # Must do it like this, in order not to break the loop
139 return $tmp if defined($tmp); 161 return $tmp if defined($tmp);
140 } 162 }
141 } elsif (ref($node) eq "HASH") { 163 }
142 if (defined($$node{"name"})) { 164 elsif (ref($node) eq "HASH")
143 if ($$node{"name"} eq $name) { 165 {
166 if (defined($$node{"name"}))
167 {
168 if ($$node{"name"} eq $name)
169 {
144 if ($args ne "") { 170 if ($args ne "") {
145 if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) { 171 if (defined($$node{"args"}) && $$node{"args"} =~ /$args/)
146 } else { 172 {
173 }
174 else
175 {
147 return html_find_node($$node{"nodes"}, $name, $args); 176 return html_find_node($$node{"nodes"}, $name, $args);
148 } 177 }
149 } 178 }
150 return $node; 179 return $node;
151 } else { 180 }
181 else
182 {
152 return html_find_node($$node{"nodes"}, $name, $args); 183 return html_find_node($$node{"nodes"}, $name, $args);
153 } 184 }
154 } 185 }
155 } 186 }
156 } 187 }
183 sub html_collapse($$) 214 sub html_collapse($$)
184 { 215 {
185 my ($node, $strip) = @_; 216 my ($node, $strip) = @_;
186 my $str = ""; 217 my $str = "";
187 218
188 if ($$node{"name"} eq "text") { 219 if ($$node{"name"} eq "text")
220 {
189 $str .= $$node{"text"}; 221 $str .= $$node{"text"};
190 } else { 222 }
223 else
224 {
191 $str .= "<".$$node{"name"}.">" unless ($strip); 225 $str .= "<".$$node{"name"}.">" unless ($strip);
192 foreach my $n (@{$$node{"nodes"}}) { 226 foreach my $n (@{$$node{"nodes"}})
227 {
193 $str .= html_collapse($n, $strip); 228 $str .= html_collapse($n, $strip);
194 } 229 }
195 $str .= "</".$$node{"name"}.">" unless ($strip); 230 $str .= "</".$$node{"name"}.">" unless ($strip);
196 } 231 }
197 232
206 my $cdata = []; 241 my $cdata = [];
207 my $cturns = 0; 242 my $cturns = 0;
208 my $cgrouped = 0; 243 my $cgrouped = 0;
209 244
210 # Pull in data for the class/hour cell 245 # Pull in data for the class/hour cell
211 foreach my $h (@{$l}) { 246 foreach my $h (@{$l})
212 if (defined($$h{"nodes"})) { 247 {
213 foreach my $b (@{$$h{"nodes"}}) { 248 if (defined($$h{"nodes"}))
214 if (defined($$b{"nodes"})) { 249 {
250 foreach my $b (@{$$h{"nodes"}})
251 {
252 if (defined($$b{"nodes"}))
253 {
215 my $text = $$b{"nodes"}[0]{"text"}; 254 my $text = $$b{"nodes"}[0]{"text"};
216 $text =~ s/\.$//; 255 $text =~ s/\.$//;
217 256
218 if ($text =~ /^vuorov/i) 257 if ($text =~ /^vuorov/i)
219 { 258 {
237 $tid = 0; 276 $tid = 0;
238 } 277 }
239 278
240 # Determine current day 279 # Determine current day
241 my $cday = 0; 280 my $cday = 0;
242 for (my $x = 0; $x < 7; $x++) { 281 for (my $x = 0; $x < 7; $x++)
243 if (!defined($$hourFillTable{$lastHour}{$x})) { 282 {
283 if (!defined($$hourFillTable{$lastHour}{$x}))
284 {
244 $cday = $x; 285 $cday = $x;
245 last; 286 last;
246 } 287 }
247 } 288 }
248 for (my $t = 0; $t < $chours; $t++) { 289 for (my $t = 0; $t < $chours; $t++)
290 {
249 $$hourFillTable{$lastHour + $t}{$cday} = $tid; 291 $$hourFillTable{$lastHour + $t}{$cday} = $tid;
250 } 292 }
251 293
252 if ($tid) 294 if ($tid)
253 { 295 {