Mercurial > hg > lukkari
annotate parsedata.pl @ 181:9c3100ab29cc
Cosmetics.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 08 Sep 2015 10:48:48 +0300 |
parents | eaac0a84a7d2 |
children | 9f7eb4db99b4 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/perl -w |
2 # | |
3 # Fetch and parse HTML format class timetable into more sane formats | |
170 | 4 # (C) Copyright 2010-2015 Matti Hämäläinen <ccr@tnsp.org> |
1 | 5 # |
6 use strict; | |
7 use Data::Dumper; | |
8 use HTML::Entities; | |
9 | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
10 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
11 ### Some globals |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
12 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
13 my $modes = "php|xml"; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
14 my $opt_mode = "php"; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
15 my $opt_dump = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
16 my $opt_filename; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
17 my $opt_outfile; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
18 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
19 my $cid = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
20 my $hourTimes = []; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
21 my $hourDefs = {}; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
22 my $hourTable = {}; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
23 my $hourFillTable = {}; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
24 my $maxDays = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
25 my $firstHour = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
26 my $lastHour = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
27 my $totalHours = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
28 |
1 | 29 |
30 sub str_trim($) | |
31 { | |
32 my $str = $_[0]; | |
170 | 33 if (defined($str)) |
34 { | |
1 | 35 $str =~ s/^\s*//; |
36 $str =~ s/\s*$//; | |
37 } | |
38 return $str; | |
39 } | |
40 | |
41 | |
42 sub pop_token_a($) | |
43 { | |
44 my $tokens = $_[0]; | |
45 return shift(@$tokens); | |
46 } | |
47 | |
48 | |
49 sub pop_token($) | |
50 { | |
51 return str_trim(pop_token_a($_[0])); | |
52 } | |
53 | |
54 | |
55 sub parse_html_str($) | |
56 { | |
57 my $tokens = $_[0]; | |
58 my $token = pop_token($tokens); | |
59 my $str = ""; | |
60 $token =~ s/^\s*//; | |
61 | |
62 return undef unless (substr($token, 0, 1) eq '"'); | |
63 $token = substr($token, 1); | |
64 | |
170 | 65 while (defined($token)) |
66 { | |
26 | 67 my $tmp = $token; |
25
a076d8d22422
Fix string parsing in the HTML parser.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
68 $tmp =~ s/\s*$//; |
170 | 69 if (substr($tmp, -1) eq '"') |
70 { | |
25
a076d8d22422
Fix string parsing in the HTML parser.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
71 $str .= substr($tmp, 0, -1); |
1 | 72 return $str; |
170 | 73 } |
74 else | |
75 { | |
1 | 76 $str .= $token; |
77 } | |
78 $token = shift(@$tokens); | |
79 } | |
80 return undef; | |
81 } | |
82 | |
83 | |
84 sub parse_html_tree($$); | |
85 | |
86 sub parse_html_tree($$) | |
87 { | |
88 my ($tokens, $tree) = @_; | |
89 | |
170 | 90 while (my $token = pop_token($tokens)) |
91 { | |
92 if ($token =~ /^<[!\/]?[a-zA-Z]+/) | |
93 { | |
1 | 94 $token = lc($token); |
170 | 95 if ($token =~ /^<\!.*>$/) |
96 { | |
1 | 97 # Ignore comments etc. |
170 | 98 } |
99 elsif ($token =~ /^<([a-z]+)(.*)>$/) | |
100 { | |
1 | 101 my ($name, $args) = ($1, $2); |
170 | 102 if ($name eq "meta" || $name eq "img") |
103 { | |
1 | 104 my $tmp = {}; |
105 parse_html_tree($tokens, $tree); | |
106 return $tree; | |
170 | 107 } |
108 else | |
109 { | |
1 | 110 my $tmp = { "name" => $name, "args" => str_trim($args) }; |
111 parse_html_tree($tokens, $tmp); | |
112 push(@{$$tree{"nodes"}}, $tmp); | |
113 } | |
170 | 114 } |
115 elsif ($token =~ /^<\/([a-z]+)>$/) | |
116 { | |
1 | 117 return $tree; |
170 | 118 } |
119 else | |
120 { | |
1 | 121 die("HORROR TERROR ELITE: $token\n"); |
122 } | |
170 | 123 } |
124 else | |
125 { | |
1 | 126 $token = str_trim(decode_entities($token)); |
127 push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; | |
128 } | |
129 } | |
130 | |
131 return $tree; | |
132 } | |
133 | |
134 | |
135 sub parse_html($) | |
136 { | |
137 return undef unless defined($_[0]); | |
138 my $str = $_[0]; | |
139 my $res = { "name" => "", "args" => "" }; | |
140 $str =~ tr/\r/ /; | |
141 $str =~ tr/\n/ /; | |
142 my @tokens = grep { !($_ =~ /^\s*$/) } split(/(<\/?[a-zA-Z]+.*?>)/, $str); | |
143 parse_html_tree(\@tokens, $res); | |
144 return $res; | |
145 } | |
146 | |
147 sub html_find_node($$$); | |
148 | |
149 sub html_find_node($$$) | |
150 { | |
151 my ($node, $name, $args) = @_; | |
152 | |
170 | 153 if (defined($node)) |
154 { | |
155 if (ref($node) eq "ARRAY") | |
156 { | |
157 foreach my $n (@$node) | |
158 { | |
1 | 159 my $tmp = html_find_node($n, $name, $args); |
160 # Must do it like this, in order not to break the loop | |
161 return $tmp if defined($tmp); | |
162 } | |
170 | 163 } |
164 elsif (ref($node) eq "HASH") | |
165 { | |
166 if (defined($$node{"name"})) | |
167 { | |
168 if ($$node{"name"} eq $name) | |
169 { | |
1 | 170 if ($args ne "") { |
170 | 171 if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) |
172 { | |
173 } | |
174 else | |
175 { | |
33
ad96ed91de92
Fix recursive parsing with attributes.
Matti Hamalainen <ccr@tnsp.org>
parents:
32
diff
changeset
|
176 return html_find_node($$node{"nodes"}, $name, $args); |
1 | 177 } |
178 } | |
179 return $node; | |
170 | 180 } |
181 else | |
182 { | |
1 | 183 return html_find_node($$node{"nodes"}, $name, $args); |
184 } | |
185 } | |
186 } | |
187 } | |
188 return undef; | |
189 } | |
190 | |
191 | |
192 sub fnode($$) | |
193 { | |
194 return html_find_node($_[0], $_[1], ""); | |
195 } | |
196 | |
197 | |
198 sub fnodea($$$) | |
199 { | |
200 return html_find_node($_[0], $_[1], $_[2]); | |
201 } | |
202 | |
203 | |
204 sub escape($) | |
205 { | |
206 my $s = $_[0]; | |
207 $s =~ s/(['"])/\\$1/g; | |
208 return $s; | |
209 } | |
210 | |
211 | |
212 sub html_collapse($$); | |
213 | |
214 sub html_collapse($$) | |
215 { | |
216 my ($node, $strip) = @_; | |
217 my $str = ""; | |
218 | |
170 | 219 if ($$node{"name"} eq "text") |
220 { | |
1 | 221 $str .= $$node{"text"}; |
170 | 222 } |
223 else | |
224 { | |
1 | 225 $str .= "<".$$node{"name"}.">" unless ($strip); |
170 | 226 foreach my $n (@{$$node{"nodes"}}) |
227 { | |
1 | 228 $str .= html_collapse($n, $strip); |
229 } | |
230 $str .= "</".$$node{"name"}.">" unless ($strip); | |
231 } | |
232 | |
233 return $str; | |
234 } | |
235 | |
236 | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
237 sub parse_hour_data($$) |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
238 { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
239 my ($l, $rowspan) = @_; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
240 my $chours = $rowspan / 2; # The table is actually in half cells |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
241 my $cdata = []; |
145
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
242 my $cturns = 0; |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
243 my $cgrouped = 0; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
244 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
245 # Pull in data for the class/hour cell |
170 | 246 foreach my $h (@{$l}) |
247 { | |
248 if (defined($$h{"nodes"})) | |
249 { | |
250 foreach my $b (@{$$h{"nodes"}}) | |
251 { | |
252 if (defined($$b{"nodes"})) | |
253 { | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
254 my $text = $$b{"nodes"}[0]{"text"}; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
255 $text =~ s/\.$//; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
256 |
146 | 257 if ($text =~ /^vuorov/i) |
145
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
258 { |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
259 $cturns = 1; |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
260 } |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
261 else |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
262 { |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
263 push(@$cdata, $text); |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
264 } |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
265 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
266 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
267 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
268 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
269 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
270 # Increased ID if there is data in this class/hour cell |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
271 my $tid; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
272 if (scalar(@$cdata) > 0) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
273 $cid++; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
274 $tid = $cid; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
275 } else { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
276 $tid = 0; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
277 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
278 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
279 # Determine current day |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
280 my $cday = 0; |
170 | 281 for (my $x = 0; $x < 7; $x++) |
282 { | |
283 if (!defined($$hourFillTable{$lastHour}{$x})) | |
284 { | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
285 $cday = $x; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
286 last; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
287 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
288 } |
170 | 289 for (my $t = 0; $t < $chours; $t++) |
290 { | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
291 $$hourFillTable{$lastHour + $t}{$cday} = $tid; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
292 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
293 |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
294 if ($tid) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
295 { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
296 $maxDays = $cday + 1 if ($cday + 1 > $maxDays); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
297 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
298 # Grouped, if there is another class ID in second slot |
169
04c4f1a95dbd
Fix parsing of some course codes.
Matti Hamalainen <ccr@tnsp.org>
parents:
168
diff
changeset
|
299 $cgrouped = 1 if ($$cdata[1] =~ /^([A-Z]\d{5,6}[A-Z]*|[A-Z0-9]{6,8})$/); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
300 if ($cgrouped) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
301 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
302 my $cdata1 = []; |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
303 my $cdata2 = []; |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
304 for (my $i = 0; $i < length($cdata); $i += 2) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
305 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
306 push(@$cdata1, $$cdata[$i]) if defined($$cdata[$i]); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
307 push(@$cdata2, $$cdata[$i+1]) if defined($$cdata[$i+1]); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
308 } |
146 | 309 $$hourDefs{$cid} = { "turns" => $cturns, "grouped" => $cgrouped, "start" => $lastHour, "hours" => $chours, "data" => [ $cdata1, $cdata2 ] }; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
310 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
311 else |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
312 { |
146 | 313 $$hourDefs{$cid} = { "turns" => $cturns, "grouped" => $cgrouped, "start" => $lastHour, "hours" => $chours, "data" => [ $cdata ] }; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
314 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
315 |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
316 push(@{$$hourTable{$cday}}, $tid); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
317 $totalHours += $chours; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
318 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
319 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
320 |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
321 sub parse_hour_header($) |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
322 { |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
323 if ($_[0] =~ /(\d+):(\d+)/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
324 { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
325 return ((int($1) * 60 + int($2)) * 60); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
326 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
327 return undef; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
328 } |
1 | 329 |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
330 |
174 | 331 sub get_hour_data_struct($$); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
332 |
174 | 333 sub get_hour_data_struct($$) |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
334 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
335 my @out = (); |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
336 my ($tmp, $first) = @_; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
337 |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
338 if (ref($tmp) eq "ARRAY") |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
339 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
340 my @str = (); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
341 foreach my $item (@{$tmp}) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
342 { |
174 | 343 push(@str, get_hour_data_struct($item, 0)); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
344 } |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
345 if (scalar(@str) > 0) |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
346 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
347 push(@out, "array(".join(", ", @str).")") if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
348 push(@out, "<group>".join("", @str)."</group>") if ($opt_mode eq "xml"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
349 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
350 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
351 elsif (ref($tmp) eq "HASH") |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
352 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
353 my @str = (); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
354 foreach my $key (keys %{$tmp}) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
355 { |
174 | 356 push(@out, "\"".$key."\" => ".get_hour_data_struct($$tmp{$key}, 1)) if ($opt_mode eq "php"); |
357 push(@out, "<".$key.">".get_hour_data_struct($$tmp{$key}, 1)."</".$key.">") if ($opt_mode eq "xml"); | |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
358 } |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
359 if (scalar(@str) > 0) |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
360 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
361 push(@out, "array(".join(", ", @str).")") if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
362 push(@out, join("", @str)) if ($opt_mode eq "xml"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
363 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
364 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
365 elsif ($tmp =~ /^\d+$/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
366 { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
367 if ($opt_mode eq "php") |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
368 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
369 push(@out, $tmp); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
370 } |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
371 else |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
372 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
373 push(@out, $first ? $tmp : "<item>".$tmp."</item>"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
374 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
375 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
376 else |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
377 { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
378 if ($opt_mode eq "php") |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
379 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
380 push(@out, "\"".$tmp."\""); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
381 } |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
382 else |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
383 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
384 push(@out, $first ? $tmp : "<item>".$tmp."</item>"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
385 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
386 } |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
387 |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
388 return join(", ", @out) if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
389 return join("", @out); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
390 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
391 |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
392 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
393 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
394 ### Main program |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
395 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
396 |
181 | 397 while (defined(my $arg = shift)) |
398 { | |
399 if (substr($arg, 0, 1) eq "-") | |
400 { | |
401 if ($arg =~ /^-($modes)$/o) | |
402 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
403 $opt_mode = $1; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
404 } |
181 | 405 elsif ($arg eq "-dump") |
406 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
407 $opt_dump = 1; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
408 } |
181 | 409 elsif ($arg eq "-o") |
410 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
411 $opt_outfile = shift or die("Output filename option -o requires an argument.\n"); |
181 | 412 } |
413 else | |
414 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
415 die("Invalid option '$arg'.\n"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
416 } |
181 | 417 } |
418 else | |
419 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
420 $opt_filename = $arg; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
421 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
422 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
423 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
424 die("Usage: $0 [options] <filename> |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
425 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
426 -php Output a PHP include file with data in arrays (default) |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
427 -xml Output a simple XML file. |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
428 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
429 -o <filename> Set output filename. Default is to use stdout. |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
430 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
431 -dump Dump HTML tree to stdout and quit. |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
432 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
433 ") unless defined($opt_filename); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
434 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
435 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
436 my $data; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
437 open(my $fh, '<:encoding(iso-8859-1)', $opt_filename) or die("Error opening '$opt_filename': $!\n"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
438 $data = do { local $/; <$fh> }; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
439 close($fh); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
440 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
441 die("No data in input.\n") unless (defined($data) && $data ne ""); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
442 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
443 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
444 # Filter out certain unneeded elements |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
445 $data =~ s/<font[^>]*>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
446 $data =~ s/<\/font>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
447 $data =~ s/<\/?center>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
448 $data =~ s/<br>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
449 $data =~ s/ / /ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
450 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
451 ### Get some general information |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
452 my $otree = parse_html($data); |
163 | 453 if ($opt_dump) |
454 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
455 print Dumper(fnode($otree, "html")); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
456 exit; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
457 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
458 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
459 my %class = (); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
460 my $body = fnode($otree, "body"); |
181 | 461 if (defined($body) && defined($$body{"nodes"})) |
462 { | |
463 foreach my $n (@{$$body{"nodes"}}) | |
464 { | |
465 if ($$n{"name"} eq "text") | |
466 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
467 push(@{$class{"info"}}, $$n{"text"}); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
468 } |
181 | 469 elsif ($$n{"name"} eq "b") |
470 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
471 push(@{$class{"data"}}, $n); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
472 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
473 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
474 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
475 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
476 # Filter out some more, for easier tree access during table parsing |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
477 $data =~ s/<\/?b>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
478 my $tree = parse_html($data); |
165
1d17dbb864ff
Fix parsing of new format of UNTIS html output.
Matti Hamalainen <ccr@tnsp.org>
parents:
164
diff
changeset
|
479 my $node = fnodea(fnode($tree, "body"), "table", "border=\"3\" rules=\"all\" cellpadding=\"1\" cellspacing=\"1\""); |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
480 die("No table element found in document. Perhaps the format has changed? :(\n") unless defined($node); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
481 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
482 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
483 ### Parse through the HTML document node tree to find the data we need |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
484 |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
485 # Skip zero position this way (can't use foreach here) |
164 | 486 my $q = $$node{"nodes"}; |
161 | 487 for (my $i = 1; $i < scalar(@{$q}); $i++) |
488 { | |
1 | 489 my $d = $$q[$i]{"nodes"}; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
490 if (defined($d)) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
491 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
492 foreach my $n (@{$d}) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
493 { |
1 | 494 my $l = $$n{"nodes"}[0]{"nodes"}; |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
495 if (defined($l)) |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
496 { |
165
1d17dbb864ff
Fix parsing of new format of UNTIS html output.
Matti Hamalainen <ccr@tnsp.org>
parents:
164
diff
changeset
|
497 if ($$n{"args"} =~ /colspan=12\s+rowspan=(\d+)/) |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
498 { |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
499 parse_hour_data($l, $1); |
1 | 500 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
501 elsif ($$n{"args"} =~ /rowspan=2\s+align/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
502 { |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
503 my $qstart = parse_hour_header($$l[0]{"nodes"}[0]{"nodes"}[0]{"text"}); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
504 my $qend = parse_hour_header($$l[1]{"nodes"}[0]{"nodes"}[0]{"text"}); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
505 if (defined($qstart) && defined($qend)) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
506 { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
507 push(@$hourTimes, {"start" => $qstart, "end" => $qend}); |
1 | 508 } |
509 } | |
510 } | |
511 } | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
512 $lastHour++; |
1 | 513 } |
514 } | |
515 | |
516 | |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
517 ### Go through hour table, find last day and hour of the week, crop |
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
518 my $flag = 1; |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
519 for (my $y = 0; $y < $lastHour && $flag; $y++) { |
51
0db0d485eb59
Rename some variables, remove "simple" mode.
Matti Hamalainen <ccr@tnsp.org>
parents:
36
diff
changeset
|
520 for (my $x = 0; $x < $maxDays && $flag; $x++) { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
521 $flag = 0 if (defined($$hourFillTable{$y}{$x}) && $$hourFillTable{$y}{$x} != 0); |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
522 } |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
523 $firstHour++ if ($flag); |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
524 } |
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
525 |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
526 $flag = 1; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
527 for (my $y = $lastHour - 1; $y >= 0 && $flag; $y--) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
528 for (my $x = 0; $x < $maxDays && $flag; $x++) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
529 $flag = 0 if (defined($$hourFillTable{$y}{$x}) && $$hourFillTable{$y}{$x} != 0); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
530 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
531 $lastHour-- if ($flag); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
532 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
533 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
534 |
1 | 535 ### Open output file, if specified |
536 if (defined($opt_outfile)) { | |
537 open(STDOUT, '>', $opt_outfile) or die("Could not open output file '$opt_outfile'.\n"); | |
538 } | |
539 | |
36
bb00a40252e2
Input in ISO-8859-1 and output in UTF-8.
Matti Hamalainen <ccr@tnsp.org>
parents:
33
diff
changeset
|
540 binmode STDOUT, ':encoding(utf-8)'; |
1 | 541 |
542 ### Output data in desired format | |
174 | 543 if ($opt_mode eq "php") |
544 { | |
1 | 545 print "<?\n". |
546 "\$classInfo = array(\n". | |
547 " \"general\" => array(".join(", ", map { "\"".escape($_)."\""; } @{$class{"info"}})."),\n". | |
548 " \"info\" => array(".join(", ", map { "\"".escape(html_collapse($_, 1))."\""; } @{$class{"data"}})."),\n". | |
54 | 549 " \"tags\" => array(".join(", ", map { "\"".escape(html_collapse($_, 0))."\""; } @{$class{"data"}})."),\n". |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
550 " \"maxDays\" => $maxDays,\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
551 " \"firstHour\" => $firstHour,\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
552 " \"lastHour\" => $lastHour,\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
553 " \"totalHours\" => $totalHours\n". |
1 | 554 ");\n\n"; |
555 | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
556 print "\$classHourTimes = array(\n"; |
174 | 557 foreach my $chour (@$hourTimes) |
558 { | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
559 print " array(\"start\" => ".$$chour{"start"}.", \"end\" => ".$$chour{"end"}."),\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
560 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
561 print ");\n\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
562 |
54 | 563 print "\$classHourDefs = array(\n"; |
174 | 564 foreach my $cid (sort { $a <=> $b } keys %{$hourDefs}) |
565 { | |
566 print " $cid => array(".get_hour_data_struct($$hourDefs{$cid}, 0)."),\n"; | |
1 | 567 } |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
568 print ");\n\n"; |
1 | 569 |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
570 print |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
571 "\$classDayTable = array(\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
572 for (my $y = 0; $y < $maxDays; $y++) |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
573 { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
574 if (defined($$hourTable{$y})) |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
575 { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
576 print " $y => array(".join(", ", @{$$hourTable{$y}})."),\n"; |
1 | 577 } |
578 } | |
579 print ");\n?>\n"; | |
580 } | |
581 | |
174 | 582 elsif ($opt_mode eq "xml") |
583 { | |
1 | 584 print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n". |
585 "<timetable>\n". | |
586 " <class>\n". | |
77
49a329e87367
Adjust some tag names to be more sane in the XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
75
diff
changeset
|
587 " <general>".join("", map { "<item>".$_."</item>"; } @{$class{"info"}})."</general>\n". |
49a329e87367
Adjust some tag names to be more sane in the XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
75
diff
changeset
|
588 " <info>".join("", map { "<item>".html_collapse($_, 1)."</item>"; } @{$class{"data"}})."</info>\n". |
52
b2f45dd616bc
Oops, the output variable names should not have been changed.
Matti Hamalainen <ccr@tnsp.org>
parents:
51
diff
changeset
|
589 " <maxdays>$maxDays</maxdays>\n". |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
590 " <firsthour>$firstHour</firsthour>\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
591 " <lasthour>$lastHour</lasthour>\n". |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
592 " <totalhours>$totalHours</totalhours>\n". |
1 | 593 " </class>\n"; |
594 | |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
595 print " <hours>\n"; |
75
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
596 my $cid = 0; |
174 | 597 foreach my $chour (@$hourTimes) |
598 { | |
75
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
599 print " <hour id=\"".$cid."\"><start>".$$chour{"start"}."</start><end>".$$chour{"end"}."</end></hour>\n"; |
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
600 $cid++; |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
601 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
602 print " </hours>\n\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
603 |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
604 print " <classes>\n"; |
174 | 605 foreach $cid (sort { $a <=> $b } keys %{$hourDefs}) |
606 { | |
607 print " <class id=\"$cid\">".get_hour_data_struct($$hourDefs{$cid}, 0)."</class>\n"; | |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
608 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
609 print " </classes>\n\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
610 |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
611 print " <days>\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
612 for (my $y = 0; $y < $maxDays; $y++) |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
613 { |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
614 if (defined($$hourTable{$y})) |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
615 { |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
616 print " <day id=\"$y\">".join("", map { "<class>".$_."</class>" } @{$$hourTable{$y}})."</day>\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
617 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
618 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
619 print " </days>\n"; |
1 | 620 |
621 print "</timetable>\n"; | |
622 } | |
623 | |
624 close (STDOUT); |