Mercurial > hg > egg-tcls
annotate urllog.tcl @ 49:7ecb0a0bfb36
urllog: Bump version.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 07 Sep 2011 14:47:34 +0300 |
parents | 2658500d1b52 |
children | f69363fc1f61 |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
49 | 3 # URLLog v2.0.1 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
8 # NOTICE! If you are upgrading to v1.90+ from any older version, you | |
9 # might want to run a conversion script against your URL-database file. | |
10 # | |
11 # It is NOT strictly necessary, but recommended especially if you | |
12 # utilize the "shorturl" functionality. The conversion script is | |
13 # available at < http://tnsp.org/egg-tcls/ > | |
14 # | |
15 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
16 |
0 | 17 ### |
18 ### HTTP options | |
19 ### | |
20 # Set to 1 if you want to use proxy | |
21 set http_proxy 0 | |
22 | |
23 # Proxy host and port number (only used if enabled above) | |
24 set http_proxy_host "" | |
25 set http_proxy_port 8080 | |
26 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
27 set http_tls_support 0 |
0 | 28 |
29 ### | |
30 ### General options | |
31 ### | |
32 | |
33 # Filename where the logged URL data goes | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
34 set urllog_db_file "urllog.sqlite" |
0 | 35 |
36 | |
37 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
38 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
39 set urllog_verbose 1 | |
40 | |
41 | |
42 # 1 = Put some info to bot's Logfile during operation | |
43 # 0 = Don't. | |
44 set urllog_logmsg 1 | |
45 | |
46 | |
47 # 1 = Check URLs for validity and existence before adding. | |
48 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
49 set urllog_check 1 | |
50 | |
51 | |
52 ### | |
53 ### Search related settings | |
54 ### | |
55 | |
56 # 0 = No search-commands available | |
57 # 1 = Search enabled | |
58 set urllog_search 1 | |
59 | |
60 | |
61 # How many URL's should the !urlfind command show (maximum limit) | |
62 set urllog_showmax_pub 3 | |
63 | |
64 | |
65 # For private-search, this is the default limit (user can change it) | |
66 set urllog_showmax_priv 6 | |
67 | |
68 | |
69 ### | |
70 ### ShortURL-settings | |
71 ### | |
72 | |
73 # 1 = Use ShortURLs | |
74 # 0 = Don't. | |
75 set urllog_shorturl 1 | |
76 | |
77 # Max length of original URL to be shown | |
78 set urllog_shorturl_orig 30 | |
79 | |
80 # Path to PHP/CGI-script that redirects ShortURLs | |
81 set urllog_shorturl_prefix "http://tnsp.org/u/" | |
82 | |
83 | |
84 ### | |
85 ### Message-texts | |
86 ### | |
87 | |
88 # No such host was found | |
89 set urlmsg_nosuchhost "ei tommosta oo!" | |
90 | |
91 # Could not connect host (I/O errors etc) | |
92 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
93 | |
94 # HTTP timeout | |
95 set urlmsg_timeout "ei jaksa ootella" | |
96 | |
97 # No such document was found | |
98 set urlmsg_errorgettingdoc "siitosvirhe" | |
99 | |
100 # URL was already known (was in database) | |
101 set urlmsg_alreadyknown "wanha!" | |
102 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
103 | |
104 # No match was found when searched with !urlfind or other command | |
105 set urlmsg_nomatch "Ei osumia." | |
106 | |
107 | |
108 ### | |
109 ### Things that you usually don't need to touch ... | |
110 ### | |
111 | |
112 # What IRC "command" should we use to send messages: | |
113 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
114 set urllog_preferredmsg "PRIVMSG" | |
115 | |
116 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
117 # (Now includes the new IANA published TLDs) | |
118 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
119 | |
120 | |
121 ########################################################################## | |
122 # No need to look below this line | |
123 ########################################################################## | |
124 #------------------------------------------------------------------------- | |
125 set urllog_name "URLLog" | |
49 | 126 set urllog_version "2.0.1" |
0 | 127 |
128 set urllog_tlds [split $urllog_tlds ","] | |
129 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
130 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
131 set urllog_html_ent [split "‏||—|-|‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
0 | 132 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
133 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
134 package require sqlite3 |
0 | 135 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
136 |
0 | 137 ### Binding initializations |
138 if {$urllog_search != 0} { | |
28 | 139 bind pub - !urlfind urllog_pub_urlfind |
140 bind msg - urlfind urllog_msg_urlfind | |
0 | 141 } |
142 | |
143 bind pubm - *.* urllog_checkmsg | |
144 bind topc - *.* urllog_checkmsg | |
145 | |
146 | |
147 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
148 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 149 putlog "$urllog_message" |
150 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
151 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
152 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
153 if {$http_proxy != 0} { |
28 | 154 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
155 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
156 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
157 if {$http_tls_support != 0} { |
28 | 158 package require tls |
159 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"] | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
160 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
161 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
162 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
163 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 164 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
165 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
166 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
167 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
168 |
0 | 169 if {$http_proxy != 0} { |
28 | 170 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 171 } |
172 | |
173 if {$urllog_check != 0} { | |
28 | 174 putlog " (Additional URL validity checks enabled)" |
0 | 175 } |
176 | |
177 if {$urllog_verbose != 0} { | |
28 | 178 putlog " (Verbose mode enabled)" |
0 | 179 } |
180 | |
181 if {$urllog_search != 0} { | |
28 | 182 putlog " (Search commands enabled)" |
0 | 183 } |
184 | |
185 #------------------------------------------------------------------------- | |
186 ### Utility functions | |
187 proc urllog_log {arg} { | |
28 | 188 global urllog_logmsg urllog_name |
0 | 189 |
28 | 190 if {$urllog_logmsg != 0} { |
191 putlog "$urllog_name: $arg" | |
192 } | |
0 | 193 } |
194 | |
195 | |
196 proc urllog_ctime { utime } { | |
197 | |
28 | 198 if {$utime == "" || $utime == "*"} { |
199 set utime 0 | |
200 } | |
0 | 201 |
28 | 202 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 203 } |
204 | |
205 | |
206 proc urllog_isnumber {uarg} { | |
28 | 207 set ufoo 1 |
0 | 208 |
28 | 209 foreach i [split $uarg {}] { |
210 if {![string match \[0-9\] $i]} {set ufoo 0} | |
211 } | |
0 | 212 |
28 | 213 return $ufoo |
0 | 214 } |
215 | |
216 | |
217 proc urllog_msg {apublic anick achan amsg} { | |
28 | 218 global urllog_preferredmsg |
0 | 219 |
28 | 220 if {$apublic == 1} { |
221 putserv "$urllog_preferredmsg $achan :$amsg" | |
222 } else { | |
223 putserv "$urllog_preferredmsg $anick :$amsg" | |
224 } | |
0 | 225 } |
226 | |
227 | |
228 proc urllog_verb_msg {anick achan amsg} { | |
28 | 229 global urllog_verbose |
0 | 230 |
28 | 231 if {$urllog_verbose != 0} { |
232 urllog_msg 1 $anick $achan $amsg | |
233 } | |
0 | 234 } |
235 | |
236 | |
237 proc urllog_convert_ent {udata} { | |
28 | 238 global urllog_html_ent |
239 regsub -all " " $udata " " utmp | |
240 regsub -all "\r" $udata " " utmp | |
241 regsub -all "\n" $utmp " " utmp | |
242 regsub -all " *" $utmp " " utmp | |
243 regsub -all "\t" $utmp "" utmp | |
244 return [string map -nocase $urllog_html_ent $utmp] | |
0 | 245 } |
246 | |
247 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
248 proc urllog_escape { str } { |
28 | 249 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
250 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
251 |
0 | 252 #------------------------------------------------------------------------- |
253 proc urllog_get_short {utime} { | |
28 | 254 global urllog_shorturl urllog_shorturl_prefix urllog_shorturl |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
255 |
28 | 256 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
257 set ulen [string length $ustr] | |
0 | 258 |
28 | 259 set u1 [expr $utime / ($ulen * $ulen)] |
260 set utmp [expr $utime % ($ulen * $ulen)] | |
261 set u2 [expr $utmp / $ulen] | |
262 set u3 [expr $utmp % $ulen] | |
0 | 263 |
28 | 264 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 265 } |
266 | |
267 | |
268 #------------------------------------------------------------------------- | |
269 proc urllog_chop_url {url} { | |
28 | 270 global urllog_shorturl_orig |
271 if {[string length $url] > $urllog_shorturl_orig} { | |
272 return "[string range $url 0 $urllog_shorturl_orig]..." | |
273 } else { | |
274 return $url | |
275 } | |
0 | 276 } |
277 | |
278 #------------------------------------------------------------------------- | |
279 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
28 | 280 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 281 |
28 | 282 ### Let's check if we already know the URL |
283 set tmpURL [urllog_escape $urlStr] | |
284 urldb eval "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan FROM urls WHERE url='$tmpURL'" { | |
285 urllog_log "URL said by $urlNick ($urlStr) already known" | |
286 if {$urllog_shorturl != 0} { | |
287 set qstr "[urllog_get_short $urlID] " | |
288 } else { | |
289 set qstr "" | |
290 } | |
291 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
292 if {[string length $urlTitle] > 0} { | |
293 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" | |
294 } else { | |
295 set qstr "$urlmsg_alreadyknown $qstr" | |
296 } | |
297 urllog_verb_msg $urlNick $urlChan $qstr | |
298 return 0 | |
299 } | |
0 | 300 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
301 |
28 | 302 ### OK, the URL was not already known - thus we add it |
303 set sql "INSERT INTO urls (utime,url,user,host,chan) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]')" | |
304 if {[catch {urldb eval $sql} uerrmsg]} { | |
305 urllog_log "$uerrmsg on SQL:\n$sql" | |
306 return 0 | |
307 } | |
308 set urlID [urldb last_insert_rowid] | |
309 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" | |
0 | 310 |
311 | |
28 | 312 ### Let's say something, to confirm that everything went well. |
313 if {$urllog_shorturl != 0} { | |
314 set qstr "[urllog_get_short $urlID] " | |
315 } else { | |
316 set qstr "" | |
317 } | |
318 if {[string length $urlTitle] > 0} { | |
319 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
320 } else { | |
321 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
322 } | |
0 | 323 |
28 | 324 return 1 |
0 | 325 } |
326 | |
327 | |
328 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
329 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 330 upvar #0 $utoken state |
0 | 331 |
28 | 332 # Stop fetching data after 3000 bytes, this should be enough to |
333 # contain the head section of a HTML page. | |
334 if {$ucurr > 64000} { | |
335 set state(status) "ok" | |
336 } | |
0 | 337 } |
338 | |
339 #------------------------------------------------------------------------- | |
340 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 341 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
342 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
343 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
344 |
28 | 345 ### Print status to bot's log |
346 urllog_log "$urlStr ($urlNick@$urlChan)" | |
0 | 347 |
28 | 348 ### Try to determine the URL protocol component (if it is missing) |
349 set u_checktld 1 | |
350 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
351 set urlStr "http://$urlStr" | |
352 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
353 set urlStr "ftp://$urlStr" | |
354 } | |
0 | 355 |
28 | 356 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
357 # Check if the IP is on local network | |
358 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { | |
359 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | |
360 return 0 | |
361 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
362 |
28 | 363 # Skip TLD check for URLs with IP address |
364 set u_checktld 0 | |
365 } | |
0 | 366 |
28 | 367 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
368 urllog_log "Ignoring ShortURL." | |
369 return 0 | |
370 } | |
0 | 371 |
28 | 372 ### Check the PORT (if the ":" is there) |
373 set u_record [split $urlStr "/"] | |
374 set u_hostname [lindex $u_record 2] | |
375 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 376 |
28 | 377 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
378 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
379 return 0 | |
380 } | |
0 | 381 |
28 | 382 # Default to port 80 (HTTP) |
383 if {![urllog_isnumber $u_port]} { | |
384 set u_port 80 | |
385 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
386 |
28 | 387 ### Is it a http or ftp url? (FIX ME!) |
388 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { | |
389 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" | |
390 return 0 | |
391 } | |
0 | 392 |
28 | 393 ### Check the Top Level Domain (TLD) validity |
394 if {$u_checktld != 0} { | |
395 set u_sane [lindex [split $u_hostname "."] end] | |
396 set u_tld [lindex [split $u_sane ":"] 0] | |
397 set u_found 0 | |
0 | 398 |
28 | 399 if {[string length $u_tld] == 2} { |
400 # Assume all 2-letter domains to be valid :) | |
401 set u_found 1 | |
402 } else { | |
403 # Check our list of known TLDs | |
404 foreach itld $urllog_tlds { | |
405 if {[string match $itld $u_tld]} { | |
406 set u_found 1 | |
407 } | |
408 } | |
409 } | |
0 | 410 |
28 | 411 if {$u_found == 0} { |
412 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
413 return 0 | |
414 } | |
415 } | |
0 | 416 |
28 | 417 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
418 |
0 | 419 |
28 | 420 ### Do we perform additional optional checks? |
421 if {$urllog_check == 0 || [string range $urlStr 0 4] != "http:"} { | |
422 # No optional checks, just add the URL | |
423 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" | |
424 return 1 | |
425 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
426 |
28 | 427 ### Does the document pointed by the URL exist? |
428 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
429 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
430 urllog_log "HTTP request failed: $uerrmsg" | |
431 return 0 | |
432 } | |
0 | 433 |
28 | 434 if {[::http::status $utoken] == "timeout"} { |
435 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
436 urllog_log "HTTP request timed out ($urlStr)" | |
437 return 0 | |
438 } | |
0 | 439 |
28 | 440 if {[::http::status $utoken] != "ok"} { |
441 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
442 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
443 return 0 | |
444 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
445 |
28 | 446 # Fixme! Handle redirects! |
447 set ucode [::http::ncode $utoken] | |
448 if {$ucode >= 200 && $ucode <= 309} { | |
449 set udata [::http::data $utoken] | |
450 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] | |
451 set uconvert 0 | |
452 if {[llength $umatches] > 0} { | |
453 set uencoding [lindex $umatches 1] | |
454 if {[string length $uencoding] > 3} { | |
455 set uconvert 1 | |
456 } | |
457 } | |
0 | 458 |
28 | 459 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
460 if {[llength $umatches] > 0} { | |
461 set urlTitle [lindex $umatches 1] | |
462 if {$uconvert != 0} { | |
463 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { | |
464 urllog_log "Error in charset conversion: $cerrmsg" | |
465 } | |
466 } | |
467 set urlTitle [urllog_convert_ent $urlTitle] | |
468 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
469 } else { | |
470 set urlTitle "" | |
471 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
472 |
28 | 473 # Rasiatube hack |
474 if {[string match "*/rasiatube/view*" $urlStr]} { | |
475 set rasia 0 | |
476 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
477 if {[llength $umatches] > 0} { | |
478 set urlStr [lindex $umatches 1] | |
479 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
480 set rasia 1 | |
481 } else { | |
482 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
483 if {[llength $umatches] > 0} { | |
484 set urlStr [lindex $umatches 1] | |
485 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
486 set rasia 1 | |
487 } | |
488 } | |
489 | |
490 if {$rasia != 0} { | |
491 urllog_log "RasiaTube mangler: $urlStr" | |
492 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
493 } | |
494 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
495 |
28 | 496 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
497 return 1 | |
498 } else { | |
499 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
500 urllog_log "[::http::code $utoken] - $urlStr" | |
501 } | |
0 | 502 |
28 | 503 ::http::cleanup $utoken |
0 | 504 } |
505 | |
506 | |
507 #------------------------------------------------------------------------- | |
508 proc urllog_checkmsg {nick uhost hand chan text} { | |
28 | 509 ### Check the nick |
510 if {$nick == "*"} { | |
511 urllog_log "urllog_checkmsg: nick was wc, this should not happen." | |
512 return 0 | |
513 } | |
0 | 514 |
28 | 515 ### Do the URL checking |
516 foreach istr [split $text " "] { | |
517 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $istr]} { | |
518 urllog_checkurl $istr $nick $uhost $chan | |
519 } | |
520 } | |
0 | 521 |
28 | 522 return 0 |
0 | 523 } |
524 | |
525 | |
526 #------------------------------------------------------------------------- | |
527 ### Parse arguments, find and show the results | |
528 proc urllog_find {unick uhand uchan utext upublic} { | |
28 | 529 global urllog_name urllog_version urllog_shorturl urldb |
530 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch | |
0 | 531 |
28 | 532 if {$upublic == 0} { |
533 set ulimit 5 | |
534 } else { | |
535 set ulimit 3 | |
536 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
537 |
28 | 538 ### Parse the given command |
539 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 540 |
28 | 541 set ftokens [split $utext " "] |
542 set fpatlist "" | |
543 foreach ftoken $ftokens { | |
544 set fprefix [string range $ftoken 0 0] | |
545 set fpattern [string range $ftoken 1 end] | |
0 | 546 |
28 | 547 if {$fprefix == "-"} { |
548 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
549 } elseif {$fprefix == "%"} { | |
550 lappend fpatlist "user='[urllog_escape $fpattern]'" | |
551 } elseif {$fprefix == "@"} { | |
552 # foo | |
553 } else { | |
554 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
555 } | |
556 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
557 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
558 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
559 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
560 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
561 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
562 } |
28 | 563 set iresults 0 |
564 set query "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" | |
565 urldb eval $query { | |
566 incr iresults | |
567 set shortURL $uurl | |
568 if {$urllog_shorturl != 0 && $urlID != ""} { | |
569 set shortURL "$shortURL [urllog_get_short $urlID]" | |
570 } | |
571 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
572 } | |
573 | |
574 if {$iresults == 0} { | |
575 # If no URLs were found | |
576 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
577 } | |
0 | 578 |
28 | 579 return 0 |
0 | 580 } |
581 | |
582 | |
583 #------------------------------------------------------------------------- | |
584 ### Finding binded functions | |
585 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 586 urllog_find $unick $uhand $uchan $utext 1 |
587 return 0 | |
0 | 588 } |
589 | |
590 | |
591 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 592 urllog_find $unick $uhand "" $utext 0 |
593 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
594 } |
0 | 595 |
596 | |
597 # end of script |