Mercurial > hg > egg-tcls
annotate urllog.tcl @ 4:8c9049f2b2b0
Improve RasiaTube de-mangler.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 25 Mar 2011 00:40:41 +0200 |
parents | 8003090caa35 |
children | 50b52294e93e |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
3 # URLLog v1.99.12 by ccr/TNSP <ccr@tnsp.org> | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
8 # NOTICE! If you are upgrading to v1.90+ from any older version, you | |
9 # might want to run a conversion script against your URL-database file. | |
10 # | |
11 # It is NOT strictly necessary, but recommended especially if you | |
12 # utilize the "shorturl" functionality. The conversion script is | |
13 # available at < http://tnsp.org/egg-tcls/ > | |
14 # | |
15 ########################################################################## | |
16 ### | |
17 ### HTTP options | |
18 ### | |
19 # Set to 1 if you want to use proxy | |
20 set http_proxy 0 | |
21 | |
22 # Proxy host and port number (only used if enabled above) | |
23 set http_proxy_host "" | |
24 set http_proxy_port 8080 | |
25 | |
26 | |
27 ### | |
28 ### General options | |
29 ### | |
30 | |
31 # Filename where the logged URL data goes | |
32 set urllog_file "data.urllog" | |
33 | |
34 | |
35 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
36 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
37 set urllog_verbose 1 | |
38 | |
39 | |
40 # 1 = Put some info to bot's Logfile during operation | |
41 # 0 = Don't. | |
42 set urllog_logmsg 1 | |
43 | |
44 | |
45 # 1 = Check URLs for validity and existence before adding. | |
46 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
47 set urllog_check 1 | |
48 | |
49 | |
50 ### | |
51 ### Search related settings | |
52 ### | |
53 | |
54 # 0 = No search-commands available | |
55 # 1 = Search enabled | |
56 set urllog_search 1 | |
57 | |
58 | |
59 # How many URL's should the !urlfind command show (maximum limit) | |
60 set urllog_showmax_pub 3 | |
61 | |
62 | |
63 # For private-search, this is the default limit (user can change it) | |
64 set urllog_showmax_priv 6 | |
65 | |
66 | |
67 ### | |
68 ### ShortURL-settings | |
69 ### | |
70 | |
71 # 1 = Use ShortURLs | |
72 # 0 = Don't. | |
73 set urllog_shorturl 1 | |
74 | |
75 # Max length of original URL to be shown | |
76 set urllog_shorturl_orig 30 | |
77 | |
78 # Path to PHP/CGI-script that redirects ShortURLs | |
79 set urllog_shorturl_prefix "http://tnsp.org/u/" | |
80 | |
81 | |
82 ### | |
83 ### Message-texts | |
84 ### | |
85 | |
86 # No such host was found | |
87 set urlmsg_nosuchhost "ei tommosta oo!" | |
88 | |
89 # Could not connect host (I/O errors etc) | |
90 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
91 | |
92 # HTTP timeout | |
93 set urlmsg_timeout "ei jaksa ootella" | |
94 | |
95 # No such document was found | |
96 set urlmsg_errorgettingdoc "siitosvirhe" | |
97 | |
98 # URL was already known (was in database) | |
99 set urlmsg_alreadyknown "wanha!" | |
100 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
101 | |
102 # No match was found when searched with !urlfind or other command | |
103 set urlmsg_nomatch "Ei osumia." | |
104 | |
105 | |
106 ### | |
107 ### Things that you usually don't need to touch ... | |
108 ### | |
109 | |
110 # What IRC "command" should we use to send messages: | |
111 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
112 set urllog_preferredmsg "PRIVMSG" | |
113 | |
114 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
115 # (Now includes the new IANA published TLDs) | |
116 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
117 | |
118 | |
119 ########################################################################## | |
120 # No need to look below this line | |
121 ########################################################################## | |
122 #------------------------------------------------------------------------- | |
123 set urllog_name "URLLog" | |
124 set urllog_version "1.99.12" | |
125 | |
126 set urllog_tlds [split $urllog_tlds ","] | |
127 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
128 | |
129 set urllog_html_ent [split "‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] | |
130 | |
131 | |
132 ### HTTP module initialization | |
133 package require http | |
134 ::http::config -useragent "$urllog_name/$urllog_version" | |
135 if {$http_proxy != 0} { | |
136 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port | |
137 } | |
138 | |
139 | |
140 ### Binding initializations | |
141 if {$urllog_search != 0} { | |
142 bind pub - !urlfind urllog_pub_urlfind | |
143 bind msg - urlfind urllog_msg_urlfind | |
144 } | |
145 | |
146 bind pubm - *.* urllog_checkmsg | |
147 bind topc - *.* urllog_checkmsg | |
148 bind msg - paska urllog_checkmsg2 | |
149 | |
150 | |
151 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
152 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 153 putlog "$urllog_message" |
154 | |
155 if {$http_proxy != 0} { | |
156 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" | |
157 } | |
158 | |
159 if {$urllog_check != 0} { | |
160 putlog " (Additional URL validity checks enabled)" | |
161 } | |
162 | |
163 if {$urllog_verbose != 0} { | |
164 putlog " (Verbose mode enabled)" | |
165 } | |
166 | |
167 if {$urllog_search != 0} { | |
168 putlog " (Search commands enabled)" | |
169 } | |
170 | |
171 | |
172 #------------------------------------------------------------------------- | |
173 ### Utility functions | |
174 proc urllog_log {arg} { | |
175 global urllog_logmsg urllog_name | |
176 | |
177 if {$urllog_logmsg != 0} { | |
178 putlog "$urllog_name: $arg" | |
179 } | |
180 } | |
181 | |
182 | |
183 proc urllog_ctime { utime } { | |
184 | |
185 if {$utime == "" || $utime == "*"} { | |
186 set utime 0 | |
187 } | |
188 | |
189 return [clock format $utime -format "%d.%m.%Y %H:%M"] | |
190 } | |
191 | |
192 | |
193 proc urllog_isnumber {uarg} { | |
194 set ufoo 1 | |
195 | |
196 foreach i [split $uarg {}] { | |
197 if {![string match \[0-9\] $i]} {set ufoo 0} | |
198 } | |
199 | |
200 return $ufoo | |
201 } | |
202 | |
203 | |
204 proc urllog_msg {apublic anick achan amsg} { | |
205 global urllog_preferredmsg | |
206 | |
207 if {$apublic == 1} { | |
208 putserv "$urllog_preferredmsg $achan :$amsg" | |
209 } else { | |
210 putserv "$urllog_preferredmsg $anick :$amsg" | |
211 } | |
212 } | |
213 | |
214 | |
215 proc urllog_verb_msg {anick achan amsg} { | |
216 global urllog_verbose | |
217 | |
218 if {$urllog_verbose != 0} { | |
219 urllog_msg 1 $anick $achan $amsg | |
220 } | |
221 } | |
222 | |
223 | |
224 proc urllog_convert_ent {udata} { | |
225 global urllog_html_ent | |
226 regsub -all " " $udata " " utmp | |
227 regsub -all "\r" $udata " " utmp | |
228 regsub -all "\n" $utmp " " utmp | |
229 regsub -all " *" $utmp " " utmp | |
230 regsub -all "\t" $utmp "" utmp | |
231 return [string map -nocase $urllog_html_ent $utmp] | |
232 } | |
233 | |
234 | |
235 #------------------------------------------------------------------------- | |
236 proc urllog_get_short {utime} { | |
237 global urllog_shorturl urllog_shorturl_prefix | |
238 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" | |
239 set ulen [string length $ustr] | |
240 | |
241 set u1 [expr $utime / ($ulen * $ulen)] | |
242 set utmp [expr $utime % ($ulen * $ulen)] | |
243 set u2 [expr $utmp / $ulen] | |
244 set u3 [expr $utmp % $ulen] | |
245 | |
246 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" | |
247 } | |
248 | |
249 | |
250 #------------------------------------------------------------------------- | |
251 proc urllog_chop_url {url} { | |
252 global urllog_shorturl_orig | |
253 if {[string length $url] > $urllog_shorturl_orig} { | |
254 return "[string range $url 0 $urllog_shorturl_orig]..." | |
255 } else { | |
256 return $url | |
257 } | |
258 } | |
259 | |
260 #------------------------------------------------------------------------- | |
261 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
262 global urlmsg_alreadyknown urllog_file urllog_shorturl |
0 | 263 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
264 ### Let's check if we already know the URL |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
265 set fd [open $urllog_file a+] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
266 close $fd |
0 | 267 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
268 set fd [open $urllog_file r] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
269 set urlID -1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
270 while {![eof $fd]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
271 set qitems [split [gets $fd] " "] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
272 set qindex [lindex $qitems 4] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
273 if {$qindex != "" && $qindex > $urlID} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
274 set urlID $qindex |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
275 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
276 if {[lindex $qitems 0] == $urlStr} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
277 urllog_log "URL said by $urlNick ($urlStr) already known" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
278 if {$urllog_shorturl != 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
279 set qstr "[urllog_get_short $urlID] " |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
280 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
281 set qstr "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
282 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
283 append qstr "([lindex $qitems 2]@[urllog_ctime [lindex $qitems 1]])" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
284 if {[string length $urlTitle] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
285 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
286 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
287 set qstr "$urlmsg_alreadyknown $qstr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
288 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
289 urllog_verb_msg $urlNick $urlChan $qstr |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
290 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
291 } |
0 | 292 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
293 close $fd |
0 | 294 |
295 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
296 ### OK, the URL was not already known - thus we add it |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
297 incr urlID |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
298 set urlTime [unixtime] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
299 set fd [open $urllog_file a+] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
300 puts $fd "$urlStr $urlTime $urlNick ($urlHost) $urlID" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
301 close $fd |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
302 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 303 |
304 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
305 ### Let's say something, to confirm that everything went well. |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
306 if {$urllog_shorturl != 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
307 set qstr "[urllog_get_short $urlID] " |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
308 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
309 set qstr "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
310 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
311 if {[string length $urlTitle] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
312 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
313 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
314 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
315 } |
0 | 316 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
317 return 1 |
0 | 318 } |
319 | |
320 | |
321 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
322 proc urllog_http_handler {utoken utotal ucurr} { |
0 | 323 upvar #0 $utoken state |
324 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
325 # Stop fetching data after 3000 bytes, this should be enough to |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
326 # contain the head section of a HTML page. |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
327 if {$ucurr > 64000} { |
0 | 328 set state(status) "ok" |
329 } | |
330 } | |
331 | |
332 #------------------------------------------------------------------------- | |
333 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
334 global botnick urllog_html urllog_tlds urllog_check urllog_file |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
335 global urlmsg_nosuchhost urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
336 global urllog_httprep urllog_shorturl_prefix urllog_shorturl urllog_encoding |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
337 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
338 ### Print status to bot's log |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
339 urllog_log "$urlStr ($urlNick@$urlChan)" |
0 | 340 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
341 ### Try to determine the URL protocol component (if it is missing) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
342 set u_checktld 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
343 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
344 set urlStr "http://$urlStr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
345 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
346 set urlStr "ftp://$urlStr" |
0 | 347 } |
348 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
349 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
350 # Check if the IP is on local network |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
351 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
352 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
353 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
354 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
355 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
356 # Skip TLD check for URLs with IP address |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
357 set u_checktld 0 |
0 | 358 } |
359 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
360 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
361 urllog_log "Ignoring ShortURL." |
0 | 362 return 0 |
363 } | |
364 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
365 ### Check the PORT (if the ":" is there) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
366 set u_record [split $urlStr "/"] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
367 set u_hostname [lindex $u_record 2] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
368 set u_port [lindex [split $u_hostname ":"] end] |
0 | 369 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
370 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
371 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
372 return 0 |
0 | 373 } |
374 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
375 # Default to port 80 (HTTP) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
376 if {![urllog_isnumber $u_port]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
377 set u_port 80 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
378 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
379 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
380 ### Is it a http or ftp url? (FIX ME!) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
381 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
382 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
383 return 0 |
0 | 384 } |
385 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
386 ### Check the Top Level Domain (TLD) validity |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
387 if {$u_checktld != 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
388 set u_sane [lindex [split $u_hostname "."] end] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
389 set u_tld [lindex [split $u_sane ":"] 0] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
390 set u_found 0 |
0 | 391 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
392 if {[string length $u_tld] == 2} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
393 # Assume all 2-letter domains to be valid :) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
394 set u_found 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
395 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
396 # Check our list of known TLDs |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
397 foreach itld $urllog_tlds { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
398 if {[string match $itld $u_tld]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
399 set u_found 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
400 } |
0 | 401 } |
402 } | |
403 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
404 if {$u_found == 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
405 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
406 return 0 |
0 | 407 } |
408 } | |
409 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
410 set urlStr [string map $urllog_httprep $urlStr] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
411 |
0 | 412 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
413 ### Do we perform additional optional checks? |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
414 if {$urllog_check == 0 || [string range $urlStr 0 6] != "http://"} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
415 # No optional checks, just add the URL |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
416 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
417 return 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
418 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
419 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
420 ### Does the document pointed by the URL exist? |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
421 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { |
0 | 422 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" |
423 urllog_log "HTTP request failed: $uerrmsg" | |
424 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
425 } |
0 | 426 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
427 if {[::http::status $utoken] == "timeout"} { |
0 | 428 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" |
429 urllog_log "HTTP request timed out ($urlStr)" | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
430 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
431 } |
0 | 432 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
433 if {[::http::status $utoken] != "ok"} { |
0 | 434 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" |
435 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
436 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
437 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
438 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
439 # Fixme! Handle redirects! |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
440 set ucode [::http::ncode $utoken] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
441 if {$ucode >= 200 && $ucode <= 309} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
442 set udata [::http::data $utoken] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
443 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
444 set uconvert 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
445 if {[llength $umatches] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
446 set uencoding [lindex $umatches 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
447 if {[string length $uencoding] > 3} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
448 set uconvert 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
449 } |
0 | 450 } |
451 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
452 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
453 if {[llength $umatches] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
454 set urlTitle [lindex $umatches 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
455 if {$uconvert != 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
456 set urlTitle [encoding convertfrom $uencoding $urlTitle] |
0 | 457 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
458 set urlTitle [urllog_convert_ent $urlTitle] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
459 regsub -all "(^ *| *$)" $urlTitle "" urlTitle |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
460 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
461 set urlTitle "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
462 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
463 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
464 # Rasiatube hack |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
465 if {[string match "*/rasiatube/view*" $urlStr]} { |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
466 set rasia 0 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
467 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] |
0 | 468 if {[llength $umatches] > 0} { |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
469 set urlStr [lindex $umatches 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
470 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
471 set rasia 1 |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
472 } else { |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
473 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
474 if {[llength $umatches] > 0} { |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
475 set urlStr [lindex $umatches 1] |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
476 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
477 set rasia 1 |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
478 } |
0 | 479 } |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
480 |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
481 if {$rasia != 0} { |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
482 urllog_log "RasiaTube mangler: $urlStr" |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
483 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
484 } |
0 | 485 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
486 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
487 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
488 return 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
489 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
490 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
491 urllog_log "[::http::code $utoken] - $urlStr" |
0 | 492 } |
493 | |
494 ::http::cleanup $utoken | |
495 } | |
496 | |
497 | |
498 #------------------------------------------------------------------------- | |
499 proc urllog_checkmsg {nick uhost hand chan text} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
500 ### Check the nick |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
501 if {$nick == "*"} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
502 urllog_log "urllog_checkmsg: nick was wc, this should not happen." |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
503 return 0 |
0 | 504 } |
505 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
506 ### Do the URL checking |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
507 foreach istr [split $text " "] { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
508 if {[string match "*http://*" $istr] || [string match "*ftp://*" $istr] || |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
509 [string match "*www.*" $istr] || [string match "*ftp.*" $istr] || |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
510 [regexp "(ftp|http|https)://\[0-9\]\{1,3\}\\.\[0-9\]\{1,3\}\\.\[0-9\]\{1,3\}\\.\[0-9\]\{1,3\}" $istr imatch]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
511 urllog_checkurl $istr $nick $uhost $chan |
0 | 512 } |
513 } | |
514 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
515 return 0 |
0 | 516 } |
517 | |
518 | |
519 #------------------------------------------------------------------------- | |
520 ### Find from database according to patterns, up to imax results. | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
521 proc urllog_urlfind {ipatterns imax} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
522 global urllog_file |
0 | 523 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
524 ### Search the database for pattern |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
525 ### Clear the count, open the URL logfile |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
526 set iresults {} |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
527 set nresults 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
528 set fd [open $urllog_file r] |
0 | 529 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
530 ### Start searching... |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
531 while {![eof $fd]} { |
0 | 532 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
533 # Get one URL for inspection |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
534 gets $fd foo |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
535 set irecord [split [string tolower $foo] " "] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
536 set iurl [lindex $irecord 0] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
537 set iname [lindex $irecord 2] |
0 | 538 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
539 # Match with all given patterns and rules |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
540 set imatched 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
541 foreach ipattern $ipatterns { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
542 set foob [split [string tolower $ipattern] " "] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
543 set ftoken [lindex $foob 0] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
544 set fparam [lindex $foob 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
545 set fmatch [string match $fparam $iurl] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
546 if {$ftoken == "+" && $fmatch == 0} { set imatched 0 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
547 if {$ftoken == "-" && $fmatch == 1} { set imatched 0 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
548 if {$ftoken == "%" && [string match $fparam $iname] == 0} { set imatched 0 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
549 } |
0 | 550 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
551 # If the patterns matched, add to the results list |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
552 if {$imatched == 1 && $foo != ""} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
553 incr nresults |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
554 lappend iresults $foo |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
555 } |
0 | 556 } |
557 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
558 # Close file |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
559 close $fd |
0 | 560 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
561 # Take only last imax results |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
562 return [lrange $iresults [expr $nresults - $imax] $nresults] |
0 | 563 } |
564 | |
565 | |
566 #------------------------------------------------------------------------- | |
567 ### Parse arguments, find and show the results | |
568 proc urllog_find {unick uhand uchan utext upublic} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
569 global botnick urllog_name urllog_version urllog_shorturl |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
570 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 571 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
572 ### Parse the given command |
0 | 573 urllog_log "$unick/$uhand searched URL: $utext" |
574 | |
575 set footokens [split $utext " "] | |
576 foreach ftoken $footokens { | |
577 set foomark [string range $ftoken 0 0] | |
578 set foopat [string range $ftoken 1 end] | |
579 | |
580 if {$foomark == "-" || $foomark == "+" || $foomark == "%" || $foomark == "@"} { | |
581 lappend ipatlist "$foomark *$foopat*" | |
582 } else { | |
583 lappend ipatlist "+ *$ftoken*" | |
584 } | |
585 } | |
586 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
587 ### Get the matches from database |
0 | 588 |
589 if {$upublic == 0} { | |
590 set iresults [urllog_urlfind $ipatlist $urllog_showmax_priv] | |
591 } else { | |
592 set iresults [urllog_urlfind $ipatlist $urllog_showmax_pub] | |
593 } | |
594 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
595 ### Show the results |
0 | 596 if {$iresults != ""} { |
597 set j 0 | |
598 foreach i $iresults { | |
599 incr j | |
600 set foo [split $i " "] | |
601 set shortURL [lindex $foo 0] | |
602 set shortID [lindex $foo 4] | |
603 | |
604 if {$urllog_shorturl != 0 && $shortID != ""} { | |
605 set shortURL "$shortURL [urllog_get_short $shortID]" | |
606 } | |
607 | |
608 urllog_msg $upublic $unick $uchan "#$j: $shortURL ([lindex $foo 2]@[urllog_ctime [lindex $foo 1]])" | |
609 } | |
610 | |
611 } else { | |
612 # If no URLs were found | |
613 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
614 } | |
615 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
616 return 0 |
0 | 617 } |
618 | |
619 | |
620 #------------------------------------------------------------------------- | |
621 ### Finding binded functions | |
622 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
623 urllog_find $unick $uhand $uchan $utext 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
624 return 0 |
0 | 625 } |
626 | |
627 | |
628 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
629 urllog_find $unick $uhand "" $utext 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
630 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
631 } |
0 | 632 |
633 | |
634 #------------------------------------------------------------------------- | |
635 proc urllog_checkmsg2 {unick uhost uhand utext} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
636 urllog_checkurl $utext $unick $uhost "#CHANNEL" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
637 return 0 |
0 | 638 } |
639 | |
640 | |
641 # end of script |