Mercurial > hg > lukkari
annotate parsedata.pl @ 163:3790db4eb29b
Cosmetic.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 21 Aug 2015 03:39:58 +0300 |
parents | 70f432e3d1dc |
children | 2654466d2655 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/perl -w |
2 # | |
3 # Fetch and parse HTML format class timetable into more sane formats | |
54 | 4 # (C) Copyright 2010-2012 Matti Hämäläinen <ccr@tnsp.org> |
1 | 5 # |
6 use strict; | |
7 use Data::Dumper; | |
8 use HTML::Entities; | |
9 | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
10 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
11 ### Some globals |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
12 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
13 my $modes = "php|xml"; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
14 my $opt_mode = "php"; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
15 my $opt_dump = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
16 my $opt_filename; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
17 my $opt_outfile; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
18 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
19 my $cid = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
20 my $hourTimes = []; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
21 my $hourDefs = {}; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
22 my $hourTable = {}; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
23 my $hourFillTable = {}; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
24 my $maxDays = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
25 my $firstHour = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
26 my $lastHour = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
27 my $totalHours = 0; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
28 |
1 | 29 |
30 sub urlencode($) | |
31 { | |
32 my $value = $_[0]; | |
33 $value =~ s/([^a-zA-Z_0-9 ])/"%" . uc(sprintf "%lx" , unpack("C", $1))/eg; | |
34 $value =~ tr/ /+/; | |
35 return $value; | |
36 } | |
37 | |
38 | |
39 sub str_trim($) | |
40 { | |
41 my $str = $_[0]; | |
42 if (defined($str)) { | |
43 $str =~ s/^\s*//; | |
44 $str =~ s/\s*$//; | |
45 } | |
46 return $str; | |
47 } | |
48 | |
49 | |
50 sub pop_token_a($) | |
51 { | |
52 my $tokens = $_[0]; | |
53 return shift(@$tokens); | |
54 } | |
55 | |
56 | |
57 sub pop_token($) | |
58 { | |
59 return str_trim(pop_token_a($_[0])); | |
60 } | |
61 | |
62 | |
63 sub parse_html_str($) | |
64 { | |
65 my $tokens = $_[0]; | |
66 my $token = pop_token($tokens); | |
67 my $str = ""; | |
68 $token =~ s/^\s*//; | |
69 | |
70 return undef unless (substr($token, 0, 1) eq '"'); | |
71 $token = substr($token, 1); | |
72 | |
73 while (defined($token)) { | |
26 | 74 my $tmp = $token; |
25
a076d8d22422
Fix string parsing in the HTML parser.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
75 $tmp =~ s/\s*$//; |
a076d8d22422
Fix string parsing in the HTML parser.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
76 if (substr($tmp, -1) eq '"') { |
a076d8d22422
Fix string parsing in the HTML parser.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
77 $str .= substr($tmp, 0, -1); |
1 | 78 return $str; |
79 } else { | |
80 $str .= $token; | |
81 } | |
82 $token = shift(@$tokens); | |
83 } | |
84 return undef; | |
85 } | |
86 | |
87 | |
88 sub parse_html_tree($$); | |
89 | |
90 sub parse_html_tree($$) | |
91 { | |
92 my ($tokens, $tree) = @_; | |
93 | |
94 while (my $token = pop_token($tokens)) { | |
95 if ($token =~ /^<[!\/]?[a-zA-Z]+/) { | |
96 $token = lc($token); | |
97 if ($token =~ /^<\!.*>$/) { | |
98 # Ignore comments etc. | |
99 } elsif ($token =~ /^<([a-z]+)(.*)>$/) { | |
100 my ($name, $args) = ($1, $2); | |
101 if ($name eq "meta" || $name eq "img") { | |
102 my $tmp = {}; | |
103 parse_html_tree($tokens, $tree); | |
104 return $tree; | |
105 } else { | |
106 my $tmp = { "name" => $name, "args" => str_trim($args) }; | |
107 parse_html_tree($tokens, $tmp); | |
108 push(@{$$tree{"nodes"}}, $tmp); | |
109 } | |
110 } elsif ($token =~ /^<\/([a-z]+)>$/) { | |
111 return $tree; | |
112 } else { | |
113 die("HORROR TERROR ELITE: $token\n"); | |
114 } | |
115 } else { | |
116 $token = str_trim(decode_entities($token)); | |
117 push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; | |
118 } | |
119 } | |
120 | |
121 return $tree; | |
122 } | |
123 | |
124 | |
125 sub parse_html($) | |
126 { | |
127 return undef unless defined($_[0]); | |
128 my $str = $_[0]; | |
129 my $res = { "name" => "", "args" => "" }; | |
130 $str =~ tr/\r/ /; | |
131 $str =~ tr/\n/ /; | |
132 my @tokens = grep { !($_ =~ /^\s*$/) } split(/(<\/?[a-zA-Z]+.*?>)/, $str); | |
133 parse_html_tree(\@tokens, $res); | |
134 return $res; | |
135 } | |
136 | |
137 sub html_find_node($$$); | |
138 | |
139 sub html_find_node($$$) | |
140 { | |
141 my ($node, $name, $args) = @_; | |
142 | |
143 if (defined($node)) { | |
144 if (ref($node) eq "ARRAY") { | |
145 foreach my $n (@$node) { | |
146 my $tmp = html_find_node($n, $name, $args); | |
147 # Must do it like this, in order not to break the loop | |
148 return $tmp if defined($tmp); | |
149 } | |
150 } elsif (ref($node) eq "HASH") { | |
151 if (defined($$node{"name"})) { | |
152 if ($$node{"name"} eq $name) { | |
153 if ($args ne "") { | |
154 if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) { | |
155 } else { | |
33
ad96ed91de92
Fix recursive parsing with attributes.
Matti Hamalainen <ccr@tnsp.org>
parents:
32
diff
changeset
|
156 return html_find_node($$node{"nodes"}, $name, $args); |
1 | 157 } |
158 } | |
159 return $node; | |
160 } else { | |
161 return html_find_node($$node{"nodes"}, $name, $args); | |
162 } | |
163 } | |
164 } | |
165 } | |
166 return undef; | |
167 } | |
168 | |
169 | |
170 sub fnode($$) | |
171 { | |
172 return html_find_node($_[0], $_[1], ""); | |
173 } | |
174 | |
175 | |
176 sub fnodea($$$) | |
177 { | |
178 return html_find_node($_[0], $_[1], $_[2]); | |
179 } | |
180 | |
181 | |
182 sub escape($) | |
183 { | |
184 my $s = $_[0]; | |
185 $s =~ s/(['"])/\\$1/g; | |
186 return $s; | |
187 } | |
188 | |
189 | |
190 sub html_collapse($$); | |
191 | |
192 sub html_collapse($$) | |
193 { | |
194 my ($node, $strip) = @_; | |
195 my $str = ""; | |
196 | |
197 if ($$node{"name"} eq "text") { | |
198 $str .= $$node{"text"}; | |
199 } else { | |
200 $str .= "<".$$node{"name"}.">" unless ($strip); | |
201 foreach my $n (@{$$node{"nodes"}}) { | |
202 $str .= html_collapse($n, $strip); | |
203 } | |
204 $str .= "</".$$node{"name"}.">" unless ($strip); | |
205 } | |
206 | |
207 return $str; | |
208 } | |
209 | |
210 | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
211 sub parse_hour_data($$) |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
212 { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
213 my ($l, $rowspan) = @_; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
214 my $chours = $rowspan / 2; # The table is actually in half cells |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
215 my $cdata = []; |
145
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
216 my $cturns = 0; |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
217 my $cgrouped = 0; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
218 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
219 # Pull in data for the class/hour cell |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
220 foreach my $h (@{$l}) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
221 if (defined($$h{"nodes"})) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
222 foreach my $b (@{$$h{"nodes"}}) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
223 if (defined($$b{"nodes"})) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
224 my $text = $$b{"nodes"}[0]{"text"}; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
225 $text =~ s/\.$//; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
226 |
146 | 227 if ($text =~ /^vuorov/i) |
145
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
228 { |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
229 $cturns = 1; |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
230 } |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
231 else |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
232 { |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
233 push(@$cdata, $text); |
c028506aa9cc
Separate the concepts of "every other week" classes and split group classes.
Matti Hamalainen <ccr@tnsp.org>
parents:
80
diff
changeset
|
234 } |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
235 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
236 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
237 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
238 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
239 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
240 # Increased ID if there is data in this class/hour cell |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
241 my $tid; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
242 if (scalar(@$cdata) > 0) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
243 $cid++; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
244 $tid = $cid; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
245 } else { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
246 $tid = 0; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
247 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
248 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
249 # Determine current day |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
250 my $cday = 0; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
251 for (my $x = 0; $x < 7; $x++) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
252 if (!defined($$hourFillTable{$lastHour}{$x})) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
253 $cday = $x; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
254 last; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
255 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
256 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
257 for (my $t = 0; $t < $chours; $t++) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
258 $$hourFillTable{$lastHour + $t}{$cday} = $tid; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
259 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
260 |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
261 if ($tid) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
262 { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
263 $maxDays = $cday + 1 if ($cday + 1 > $maxDays); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
264 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
265 # Grouped, if there is another class ID in second slot |
156
c0c69b3b1f7d
Fix parsing of certain IDs.
Matti Hamalainen <ccr@tnsp.org>
parents:
146
diff
changeset
|
266 $cgrouped = 1 if ($$cdata[1] =~ /^[A-Z]\d{5,6}[A-Z]*$/); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
267 if ($cgrouped) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
268 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
269 my $cdata1 = []; |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
270 my $cdata2 = []; |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
271 for (my $i = 0; $i < length($cdata); $i += 2) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
272 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
273 push(@$cdata1, $$cdata[$i]) if defined($$cdata[$i]); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
274 push(@$cdata2, $$cdata[$i+1]) if defined($$cdata[$i+1]); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
275 } |
146 | 276 $$hourDefs{$cid} = { "turns" => $cturns, "grouped" => $cgrouped, "start" => $lastHour, "hours" => $chours, "data" => [ $cdata1, $cdata2 ] }; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
277 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
278 else |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
279 { |
146 | 280 $$hourDefs{$cid} = { "turns" => $cturns, "grouped" => $cgrouped, "start" => $lastHour, "hours" => $chours, "data" => [ $cdata ] }; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
281 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
282 |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
283 push(@{$$hourTable{$cday}}, $tid); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
284 $totalHours += $chours; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
285 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
286 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
287 |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
288 sub parse_hour_header($) |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
289 { |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
290 if ($_[0] =~ /(\d+):(\d+)/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
291 { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
292 return ((int($1) * 60 + int($2)) * 60); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
293 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
294 return undef; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
295 } |
1 | 296 |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
297 |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
298 sub getDataStruct($$); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
299 |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
300 sub getDataStruct($$) |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
301 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
302 my @out = (); |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
303 my ($tmp, $first) = @_; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
304 |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
305 if (ref($tmp) eq "ARRAY") |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
306 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
307 my @str = (); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
308 foreach my $item (@{$tmp}) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
309 { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
310 push(@str, getDataStruct($item, 0)); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
311 } |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
312 if (scalar(@str) > 0) |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
313 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
314 push(@out, "array(".join(", ", @str).")") if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
315 push(@out, "<group>".join("", @str)."</group>") if ($opt_mode eq "xml"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
316 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
317 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
318 elsif (ref($tmp) eq "HASH") |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
319 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
320 my @str = (); |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
321 foreach my $key (keys %{$tmp}) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
322 { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
323 push(@out, "\"".$key."\" => ".getDataStruct($$tmp{$key}, 1)) if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
324 push(@out, "<".$key.">".getDataStruct($$tmp{$key}, 1)."</".$key.">") if ($opt_mode eq "xml"); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
325 } |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
326 if (scalar(@str) > 0) |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
327 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
328 push(@out, "array(".join(", ", @str).")") if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
329 push(@out, join("", @str)) if ($opt_mode eq "xml"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
330 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
331 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
332 elsif ($tmp =~ /^\d+$/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
333 { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
334 if ($opt_mode eq "php") |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
335 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
336 push(@out, $tmp); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
337 } |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
338 else |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
339 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
340 push(@out, $first ? $tmp : "<item>".$tmp."</item>"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
341 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
342 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
343 else |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
344 { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
345 if ($opt_mode eq "php") |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
346 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
347 push(@out, "\"".$tmp."\""); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
348 } |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
349 else |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
350 { |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
351 push(@out, $first ? $tmp : "<item>".$tmp."</item>"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
352 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
353 } |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
354 |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
355 return join(", ", @out) if ($opt_mode eq "php"); |
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
356 return join("", @out); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
357 } |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
358 |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
359 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
360 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
361 ### Main program |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
362 ### |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
363 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
364 while (defined(my $arg = shift)) { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
365 if (substr($arg, 0, 1) eq "-") { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
366 if ($arg =~ /^-($modes)$/o) { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
367 $opt_mode = $1; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
368 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
369 elsif ($arg eq "-dump") { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
370 $opt_dump = 1; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
371 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
372 elsif ($arg eq "-o") { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
373 $opt_outfile = shift or die("Output filename option -o requires an argument.\n"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
374 } else { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
375 die("Invalid option '$arg'.\n"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
376 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
377 } else { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
378 $opt_filename = $arg; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
379 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
380 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
381 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
382 die("Usage: $0 [options] <filename> |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
383 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
384 -php Output a PHP include file with data in arrays (default) |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
385 -xml Output a simple XML file. |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
386 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
387 -o <filename> Set output filename. Default is to use stdout. |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
388 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
389 -dump Dump HTML tree to stdout and quit. |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
390 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
391 ") unless defined($opt_filename); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
392 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
393 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
394 my $data; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
395 open(my $fh, '<:encoding(iso-8859-1)', $opt_filename) or die("Error opening '$opt_filename': $!\n"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
396 $data = do { local $/; <$fh> }; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
397 close($fh); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
398 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
399 die("No data in input.\n") unless (defined($data) && $data ne ""); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
400 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
401 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
402 # Filter out certain unneeded elements |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
403 $data =~ s/<font[^>]*>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
404 $data =~ s/<\/font>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
405 $data =~ s/<\/?center>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
406 $data =~ s/<br>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
407 $data =~ s/ / /ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
408 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
409 ### Get some general information |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
410 my $otree = parse_html($data); |
163 | 411 if ($opt_dump) |
412 { | |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
413 print Dumper(fnode($otree, "html")); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
414 exit; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
415 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
416 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
417 my %class = (); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
418 my $body = fnode($otree, "body"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
419 if (defined($body) && defined($$body{"nodes"})) { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
420 foreach my $n (@{$$body{"nodes"}}) { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
421 if ($$n{"name"} eq "text") { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
422 push(@{$class{"info"}}, $$n{"text"}); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
423 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
424 elsif ($$n{"name"} eq "b") { |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
425 push(@{$class{"data"}}, $n); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
426 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
427 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
428 } |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
429 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
430 # Filter out some more, for easier tree access during table parsing |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
431 $data =~ s/<\/?b>//ig; |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
432 my $tree = parse_html($data); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
433 my $node = fnode(fnode($tree, "body"), "table"); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
434 die("No table element found in document. Perhaps the format has changed? :(\n") unless defined($node); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
435 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
436 |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
437 ### Parse through the HTML document node tree to find the data we need |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
438 |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
439 # Skip zero position this way (can't use foreach here) |
161 | 440 for (my $i = 1; $i < scalar(@{$q}); $i++) |
441 { | |
1 | 442 my $d = $$q[$i]{"nodes"}; |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
443 if (defined($d)) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
444 { |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
445 foreach my $n (@{$d}) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
446 { |
1 | 447 my $l = $$n{"nodes"}[0]{"nodes"}; |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
448 if (defined($l)) |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
449 { |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
450 if ($$n{"args"} =~ /colspan=6\s+rowspan=(\d+)/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
451 { |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
452 parse_hour_data($l, $1); |
1 | 453 } |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
454 elsif ($$n{"args"} =~ /rowspan=2\s+align/) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
455 { |
162
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
456 my $qstart = parse_hour_header($$l[0]{"nodes"}[0]{"nodes"}[0]{"text"}); |
70f432e3d1dc
Some remodeling here and there.
Matti Hamalainen <ccr@tnsp.org>
parents:
161
diff
changeset
|
457 my $qend = parse_hour_header($$l[1]{"nodes"}[0]{"nodes"}[0]{"text"}); |
72
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
458 if (defined($qstart) && defined($qend)) |
6fd715063abc
Clean up some parsing operations.
Matti Hamalainen <ccr@tnsp.org>
parents:
58
diff
changeset
|
459 { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
460 push(@$hourTimes, {"start" => $qstart, "end" => $qend}); |
1 | 461 } |
462 } | |
463 } | |
464 } | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
465 $lastHour++; |
1 | 466 } |
467 } | |
468 | |
469 | |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
470 ### Go through hour table, find last day and hour of the week, crop |
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
471 my $flag = 1; |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
472 for (my $y = 0; $y < $lastHour && $flag; $y++) { |
51
0db0d485eb59
Rename some variables, remove "simple" mode.
Matti Hamalainen <ccr@tnsp.org>
parents:
36
diff
changeset
|
473 for (my $x = 0; $x < $maxDays && $flag; $x++) { |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
474 $flag = 0 if (defined($$hourFillTable{$y}{$x}) && $$hourFillTable{$y}{$x} != 0); |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
475 } |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
476 $firstHour++ if ($flag); |
8
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
477 } |
a52a0bdb5ea1
Crop off empty hours (time slots) and days.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
478 |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
479 $flag = 1; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
480 for (my $y = $lastHour - 1; $y >= 0 && $flag; $y--) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
481 for (my $x = 0; $x < $maxDays && $flag; $x++) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
482 $flag = 0 if (defined($$hourFillTable{$y}{$x}) && $$hourFillTable{$y}{$x} != 0); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
483 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
484 $lastHour-- if ($flag); |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
485 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
486 |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
487 |
1 | 488 ### Open output file, if specified |
489 if (defined($opt_outfile)) { | |
490 open(STDOUT, '>', $opt_outfile) or die("Could not open output file '$opt_outfile'.\n"); | |
491 } | |
492 | |
36
bb00a40252e2
Input in ISO-8859-1 and output in UTF-8.
Matti Hamalainen <ccr@tnsp.org>
parents:
33
diff
changeset
|
493 binmode STDOUT, ':encoding(utf-8)'; |
1 | 494 |
495 ### Output data in desired format | |
496 if ($opt_mode eq "php") { | |
497 print "<?\n". | |
498 "\$classInfo = array(\n". | |
499 " \"general\" => array(".join(", ", map { "\"".escape($_)."\""; } @{$class{"info"}})."),\n". | |
500 " \"info\" => array(".join(", ", map { "\"".escape(html_collapse($_, 1))."\""; } @{$class{"data"}})."),\n". | |
54 | 501 " \"tags\" => array(".join(", ", map { "\"".escape(html_collapse($_, 0))."\""; } @{$class{"data"}})."),\n". |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
502 " \"maxDays\" => $maxDays,\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
503 " \"firstHour\" => $firstHour,\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
504 " \"lastHour\" => $lastHour,\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
505 " \"totalHours\" => $totalHours\n". |
1 | 506 ");\n\n"; |
507 | |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
508 print "\$classHourTimes = array(\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
509 foreach my $chour (@$hourTimes) { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
510 print " array(\"start\" => ".$$chour{"start"}.", \"end\" => ".$$chour{"end"}."),\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
511 } |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
512 print ");\n\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
513 |
54 | 514 print "\$classHourDefs = array(\n"; |
51
0db0d485eb59
Rename some variables, remove "simple" mode.
Matti Hamalainen <ccr@tnsp.org>
parents:
36
diff
changeset
|
515 foreach my $cid (sort { $a <=> $b } keys %{$hourDefs}) { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
516 print " $cid => array(".getDataStruct($$hourDefs{$cid}, 0)."),\n"; |
1 | 517 } |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
518 print ");\n\n"; |
1 | 519 |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
520 print |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
521 "\$classDayTable = array(\n"; |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
522 for (my $y = 0; $y < $maxDays; $y++) |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
523 { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
524 if (defined($$hourTable{$y})) |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
525 { |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
526 print " $y => array(".join(", ", @{$$hourTable{$y}})."),\n"; |
1 | 527 } |
528 } | |
529 print ");\n?>\n"; | |
530 } | |
531 | |
532 elsif ($opt_mode eq "xml") { | |
533 print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n". | |
534 "<timetable>\n". | |
535 " <class>\n". | |
77
49a329e87367
Adjust some tag names to be more sane in the XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
75
diff
changeset
|
536 " <general>".join("", map { "<item>".$_."</item>"; } @{$class{"info"}})."</general>\n". |
49a329e87367
Adjust some tag names to be more sane in the XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
75
diff
changeset
|
537 " <info>".join("", map { "<item>".html_collapse($_, 1)."</item>"; } @{$class{"data"}})."</info>\n". |
52
b2f45dd616bc
Oops, the output variable names should not have been changed.
Matti Hamalainen <ccr@tnsp.org>
parents:
51
diff
changeset
|
538 " <maxdays>$maxDays</maxdays>\n". |
57
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
539 " <firsthour>$firstHour</firsthour>\n". |
93c87f42c803
New parser output format, parser logic cleaned up a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
55
diff
changeset
|
540 " <lasthour>$lastHour</lasthour>\n". |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
541 " <totalhours>$totalHours</totalhours>\n". |
1 | 542 " </class>\n"; |
543 | |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
544 print " <hours>\n"; |
75
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
545 my $cid = 0; |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
546 foreach my $chour (@$hourTimes) { |
75
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
547 print " <hour id=\"".$cid."\"><start>".$$chour{"start"}."</start><end>".$$chour{"end"}."</end></hour>\n"; |
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
548 $cid++; |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
549 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
550 print " </hours>\n\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
551 |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
552 print " <classes>\n"; |
75
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
74
diff
changeset
|
553 foreach $cid (sort { $a <=> $b } keys %{$hourDefs}) { |
74
b51ad733b624
Improvements in the parser, now also produces "XML" output.
Matti Hamalainen <ccr@tnsp.org>
parents:
72
diff
changeset
|
554 print " <class id=\"$cid\">".getDataStruct($$hourDefs{$cid}, 0)."</class>\n"; |
58
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
555 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
556 print " </classes>\n\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
557 |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
558 print " <days>\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
559 for (my $y = 0; $y < $maxDays; $y++) |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
560 { |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
561 if (defined($$hourTable{$y})) |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
562 { |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
563 print " <day id=\"$y\">".join("", map { "<class>".$_."</class>" } @{$$hourTable{$y}})."</day>\n"; |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
564 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
565 } |
119f0cef6498
Work on XML output support (not finished yet.)
Matti Hamalainen <ccr@tnsp.org>
parents:
57
diff
changeset
|
566 print " </days>\n"; |
1 | 567 |
568 print "</timetable>\n"; | |
569 } | |
570 | |
571 close (STDOUT); |