Mercurial > hg > lukkari
comparison parsedata.pl @ 170:34ce8339b956
Cosmetic reformatting.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 24 Aug 2015 11:00:13 +0300 |
parents | 04c4f1a95dbd |
children | eaac0a84a7d2 |
comparison
equal
deleted
inserted
replaced
169:04c4f1a95dbd | 170:34ce8339b956 |
---|---|
1 #!/usr/bin/perl -w | 1 #!/usr/bin/perl -w |
2 # | 2 # |
3 # Fetch and parse HTML format class timetable into more sane formats | 3 # Fetch and parse HTML format class timetable into more sane formats |
4 # (C) Copyright 2010-2012 Matti Hämäläinen <ccr@tnsp.org> | 4 # (C) Copyright 2010-2015 Matti Hämäläinen <ccr@tnsp.org> |
5 # | 5 # |
6 use strict; | 6 use strict; |
7 use Data::Dumper; | 7 use Data::Dumper; |
8 use HTML::Entities; | 8 use HTML::Entities; |
9 | 9 |
28 | 28 |
29 | 29 |
30 sub str_trim($) | 30 sub str_trim($) |
31 { | 31 { |
32 my $str = $_[0]; | 32 my $str = $_[0]; |
33 if (defined($str)) { | 33 if (defined($str)) |
34 { | |
34 $str =~ s/^\s*//; | 35 $str =~ s/^\s*//; |
35 $str =~ s/\s*$//; | 36 $str =~ s/\s*$//; |
36 } | 37 } |
37 return $str; | 38 return $str; |
38 } | 39 } |
59 $token =~ s/^\s*//; | 60 $token =~ s/^\s*//; |
60 | 61 |
61 return undef unless (substr($token, 0, 1) eq '"'); | 62 return undef unless (substr($token, 0, 1) eq '"'); |
62 $token = substr($token, 1); | 63 $token = substr($token, 1); |
63 | 64 |
64 while (defined($token)) { | 65 while (defined($token)) |
66 { | |
65 my $tmp = $token; | 67 my $tmp = $token; |
66 $tmp =~ s/\s*$//; | 68 $tmp =~ s/\s*$//; |
67 if (substr($tmp, -1) eq '"') { | 69 if (substr($tmp, -1) eq '"') |
70 { | |
68 $str .= substr($tmp, 0, -1); | 71 $str .= substr($tmp, 0, -1); |
69 return $str; | 72 return $str; |
70 } else { | 73 } |
74 else | |
75 { | |
71 $str .= $token; | 76 $str .= $token; |
72 } | 77 } |
73 $token = shift(@$tokens); | 78 $token = shift(@$tokens); |
74 } | 79 } |
75 return undef; | 80 return undef; |
80 | 85 |
81 sub parse_html_tree($$) | 86 sub parse_html_tree($$) |
82 { | 87 { |
83 my ($tokens, $tree) = @_; | 88 my ($tokens, $tree) = @_; |
84 | 89 |
85 while (my $token = pop_token($tokens)) { | 90 while (my $token = pop_token($tokens)) |
86 if ($token =~ /^<[!\/]?[a-zA-Z]+/) { | 91 { |
92 if ($token =~ /^<[!\/]?[a-zA-Z]+/) | |
93 { | |
87 $token = lc($token); | 94 $token = lc($token); |
88 if ($token =~ /^<\!.*>$/) { | 95 if ($token =~ /^<\!.*>$/) |
96 { | |
89 # Ignore comments etc. | 97 # Ignore comments etc. |
90 } elsif ($token =~ /^<([a-z]+)(.*)>$/) { | 98 } |
99 elsif ($token =~ /^<([a-z]+)(.*)>$/) | |
100 { | |
91 my ($name, $args) = ($1, $2); | 101 my ($name, $args) = ($1, $2); |
92 if ($name eq "meta" || $name eq "img") { | 102 if ($name eq "meta" || $name eq "img") |
103 { | |
93 my $tmp = {}; | 104 my $tmp = {}; |
94 parse_html_tree($tokens, $tree); | 105 parse_html_tree($tokens, $tree); |
95 return $tree; | 106 return $tree; |
96 } else { | 107 } |
108 else | |
109 { | |
97 my $tmp = { "name" => $name, "args" => str_trim($args) }; | 110 my $tmp = { "name" => $name, "args" => str_trim($args) }; |
98 parse_html_tree($tokens, $tmp); | 111 parse_html_tree($tokens, $tmp); |
99 push(@{$$tree{"nodes"}}, $tmp); | 112 push(@{$$tree{"nodes"}}, $tmp); |
100 } | 113 } |
101 } elsif ($token =~ /^<\/([a-z]+)>$/) { | 114 } |
115 elsif ($token =~ /^<\/([a-z]+)>$/) | |
116 { | |
102 return $tree; | 117 return $tree; |
103 } else { | 118 } |
119 else | |
120 { | |
104 die("HORROR TERROR ELITE: $token\n"); | 121 die("HORROR TERROR ELITE: $token\n"); |
105 } | 122 } |
106 } else { | 123 } |
124 else | |
125 { | |
107 $token = str_trim(decode_entities($token)); | 126 $token = str_trim(decode_entities($token)); |
108 push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; | 127 push(@{$$tree{"nodes"}}, { "name" => "text", "args" => "", "text" => $token }) if length($token) > 0; |
109 } | 128 } |
110 } | 129 } |
111 | 130 |
129 | 148 |
130 sub html_find_node($$$) | 149 sub html_find_node($$$) |
131 { | 150 { |
132 my ($node, $name, $args) = @_; | 151 my ($node, $name, $args) = @_; |
133 | 152 |
134 if (defined($node)) { | 153 if (defined($node)) |
135 if (ref($node) eq "ARRAY") { | 154 { |
136 foreach my $n (@$node) { | 155 if (ref($node) eq "ARRAY") |
156 { | |
157 foreach my $n (@$node) | |
158 { | |
137 my $tmp = html_find_node($n, $name, $args); | 159 my $tmp = html_find_node($n, $name, $args); |
138 # Must do it like this, in order not to break the loop | 160 # Must do it like this, in order not to break the loop |
139 return $tmp if defined($tmp); | 161 return $tmp if defined($tmp); |
140 } | 162 } |
141 } elsif (ref($node) eq "HASH") { | 163 } |
142 if (defined($$node{"name"})) { | 164 elsif (ref($node) eq "HASH") |
143 if ($$node{"name"} eq $name) { | 165 { |
166 if (defined($$node{"name"})) | |
167 { | |
168 if ($$node{"name"} eq $name) | |
169 { | |
144 if ($args ne "") { | 170 if ($args ne "") { |
145 if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) { | 171 if (defined($$node{"args"}) && $$node{"args"} =~ /$args/) |
146 } else { | 172 { |
173 } | |
174 else | |
175 { | |
147 return html_find_node($$node{"nodes"}, $name, $args); | 176 return html_find_node($$node{"nodes"}, $name, $args); |
148 } | 177 } |
149 } | 178 } |
150 return $node; | 179 return $node; |
151 } else { | 180 } |
181 else | |
182 { | |
152 return html_find_node($$node{"nodes"}, $name, $args); | 183 return html_find_node($$node{"nodes"}, $name, $args); |
153 } | 184 } |
154 } | 185 } |
155 } | 186 } |
156 } | 187 } |
183 sub html_collapse($$) | 214 sub html_collapse($$) |
184 { | 215 { |
185 my ($node, $strip) = @_; | 216 my ($node, $strip) = @_; |
186 my $str = ""; | 217 my $str = ""; |
187 | 218 |
188 if ($$node{"name"} eq "text") { | 219 if ($$node{"name"} eq "text") |
220 { | |
189 $str .= $$node{"text"}; | 221 $str .= $$node{"text"}; |
190 } else { | 222 } |
223 else | |
224 { | |
191 $str .= "<".$$node{"name"}.">" unless ($strip); | 225 $str .= "<".$$node{"name"}.">" unless ($strip); |
192 foreach my $n (@{$$node{"nodes"}}) { | 226 foreach my $n (@{$$node{"nodes"}}) |
227 { | |
193 $str .= html_collapse($n, $strip); | 228 $str .= html_collapse($n, $strip); |
194 } | 229 } |
195 $str .= "</".$$node{"name"}.">" unless ($strip); | 230 $str .= "</".$$node{"name"}.">" unless ($strip); |
196 } | 231 } |
197 | 232 |
206 my $cdata = []; | 241 my $cdata = []; |
207 my $cturns = 0; | 242 my $cturns = 0; |
208 my $cgrouped = 0; | 243 my $cgrouped = 0; |
209 | 244 |
210 # Pull in data for the class/hour cell | 245 # Pull in data for the class/hour cell |
211 foreach my $h (@{$l}) { | 246 foreach my $h (@{$l}) |
212 if (defined($$h{"nodes"})) { | 247 { |
213 foreach my $b (@{$$h{"nodes"}}) { | 248 if (defined($$h{"nodes"})) |
214 if (defined($$b{"nodes"})) { | 249 { |
250 foreach my $b (@{$$h{"nodes"}}) | |
251 { | |
252 if (defined($$b{"nodes"})) | |
253 { | |
215 my $text = $$b{"nodes"}[0]{"text"}; | 254 my $text = $$b{"nodes"}[0]{"text"}; |
216 $text =~ s/\.$//; | 255 $text =~ s/\.$//; |
217 | 256 |
218 if ($text =~ /^vuorov/i) | 257 if ($text =~ /^vuorov/i) |
219 { | 258 { |
237 $tid = 0; | 276 $tid = 0; |
238 } | 277 } |
239 | 278 |
240 # Determine current day | 279 # Determine current day |
241 my $cday = 0; | 280 my $cday = 0; |
242 for (my $x = 0; $x < 7; $x++) { | 281 for (my $x = 0; $x < 7; $x++) |
243 if (!defined($$hourFillTable{$lastHour}{$x})) { | 282 { |
283 if (!defined($$hourFillTable{$lastHour}{$x})) | |
284 { | |
244 $cday = $x; | 285 $cday = $x; |
245 last; | 286 last; |
246 } | 287 } |
247 } | 288 } |
248 for (my $t = 0; $t < $chours; $t++) { | 289 for (my $t = 0; $t < $chours; $t++) |
290 { | |
249 $$hourFillTable{$lastHour + $t}{$cday} = $tid; | 291 $$hourFillTable{$lastHour + $t}{$cday} = $tid; |
250 } | 292 } |
251 | 293 |
252 if ($tid) | 294 if ($tid) |
253 { | 295 { |