Mercurial > hg > egg-tcls
annotate urllog.tcl @ 62:6428b1bcb34b
urllog: Remove some global variable references where they are not used.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 08 Sep 2011 22:23:33 +0300 |
parents | f69363fc1f61 |
children | 31c8c4f50aa6 |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
49 | 3 # URLLog v2.0.1 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
8 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
9 # |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
10 # NOTICE! If you are upgrading to v2.0+ from any 1.x version, you |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
11 # may want to run a conversion script against your URL-database file, |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
12 # if you wish to preserve the old data. |
0 | 13 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
14 # See convert_urllog_db.tcl for more information. |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
15 # |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # If you are making a fresh install, you will need to create the |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
17 # initial SQLite3 database with the required table schemas. You |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
18 # can do that by running: create_urllog_db.tcl |
0 | 19 # |
20 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
21 |
0 | 22 ### |
23 ### HTTP options | |
24 ### | |
25 # Set to 1 if you want to use proxy | |
26 set http_proxy 0 | |
27 | |
28 # Proxy host and port number (only used if enabled above) | |
29 set http_proxy_host "" | |
30 set http_proxy_port 8080 | |
31 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
32 # 1 = Enable experimental TLS/SSL support. This may not work. |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
33 set http_tls_support 0 |
0 | 34 |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
35 |
0 | 36 ### |
37 ### General options | |
38 ### | |
39 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
40 # Filename of the SQLite URL database file |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
41 set urllog_db_file "urllog.sqlite" |
0 | 42 |
43 | |
44 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
45 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
46 set urllog_verbose 1 | |
47 | |
48 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
49 # 1 = Enable logging of various script actions into bot's log |
0 | 50 # 0 = Don't. |
51 set urllog_logmsg 1 | |
52 | |
53 | |
54 # 1 = Check URLs for validity and existence before adding. | |
55 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
56 set urllog_check 1 | |
57 | |
58 | |
59 ### | |
60 ### Search related settings | |
61 ### | |
62 | |
63 # 0 = No search-commands available | |
64 # 1 = Search enabled | |
65 set urllog_search 1 | |
66 | |
67 | |
68 # How many URL's should the !urlfind command show (maximum limit) | |
69 set urllog_showmax_pub 3 | |
70 | |
71 | |
72 # For private-search, this is the default limit (user can change it) | |
73 set urllog_showmax_priv 6 | |
74 | |
75 | |
76 ### | |
77 ### ShortURL-settings | |
78 ### | |
79 | |
80 # 1 = Use ShortURLs | |
81 # 0 = Don't. | |
82 set urllog_shorturl 1 | |
83 | |
84 # Max length of original URL to be shown | |
85 set urllog_shorturl_orig 30 | |
86 | |
87 # Path to PHP/CGI-script that redirects ShortURLs | |
88 set urllog_shorturl_prefix "http://tnsp.org/u/" | |
89 | |
90 | |
91 ### | |
92 ### Message-texts | |
93 ### | |
94 | |
95 # No such host was found | |
96 set urlmsg_nosuchhost "ei tommosta oo!" | |
97 | |
98 # Could not connect host (I/O errors etc) | |
99 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
100 | |
101 # HTTP timeout | |
102 set urlmsg_timeout "ei jaksa ootella" | |
103 | |
104 # No such document was found | |
105 set urlmsg_errorgettingdoc "siitosvirhe" | |
106 | |
107 # URL was already known (was in database) | |
108 set urlmsg_alreadyknown "wanha!" | |
109 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
110 | |
111 # No match was found when searched with !urlfind or other command | |
112 set urlmsg_nomatch "Ei osumia." | |
113 | |
114 | |
115 ### | |
116 ### Things that you usually don't need to touch ... | |
117 ### | |
118 | |
119 # What IRC "command" should we use to send messages: | |
120 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
121 set urllog_preferredmsg "PRIVMSG" | |
122 | |
123 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
124 # (Now includes the new IANA published TLDs) | |
125 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
126 | |
127 | |
128 ########################################################################## | |
129 # No need to look below this line | |
130 ########################################################################## | |
131 #------------------------------------------------------------------------- | |
132 set urllog_name "URLLog" | |
49 | 133 set urllog_version "2.0.1" |
0 | 134 |
135 set urllog_tlds [split $urllog_tlds ","] | |
136 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
137 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
138 set urllog_html_ent [split "‏||—|-|‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
0 | 139 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
140 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
141 package require sqlite3 |
0 | 142 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
143 |
0 | 144 ### Binding initializations |
145 if {$urllog_search != 0} { | |
28 | 146 bind pub - !urlfind urllog_pub_urlfind |
147 bind msg - urlfind urllog_msg_urlfind | |
0 | 148 } |
149 | |
150 bind pubm - *.* urllog_checkmsg | |
151 bind topc - *.* urllog_checkmsg | |
152 | |
153 | |
154 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
155 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 156 putlog "$urllog_message" |
157 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
158 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
159 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
160 if {$http_proxy != 0} { |
28 | 161 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
162 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
163 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
164 if {$http_tls_support != 0} { |
28 | 165 package require tls |
166 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"] | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
167 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
168 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
169 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
170 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 171 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
172 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
173 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
174 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
175 |
0 | 176 if {$http_proxy != 0} { |
28 | 177 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 178 } |
179 | |
180 if {$urllog_check != 0} { | |
28 | 181 putlog " (Additional URL validity checks enabled)" |
0 | 182 } |
183 | |
184 if {$urllog_verbose != 0} { | |
28 | 185 putlog " (Verbose mode enabled)" |
0 | 186 } |
187 | |
188 if {$urllog_search != 0} { | |
28 | 189 putlog " (Search commands enabled)" |
0 | 190 } |
191 | |
192 #------------------------------------------------------------------------- | |
193 ### Utility functions | |
194 proc urllog_log {arg} { | |
28 | 195 global urllog_logmsg urllog_name |
0 | 196 |
28 | 197 if {$urllog_logmsg != 0} { |
198 putlog "$urllog_name: $arg" | |
199 } | |
0 | 200 } |
201 | |
202 | |
203 proc urllog_ctime { utime } { | |
204 | |
28 | 205 if {$utime == "" || $utime == "*"} { |
206 set utime 0 | |
207 } | |
0 | 208 |
28 | 209 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 210 } |
211 | |
212 | |
213 proc urllog_isnumber {uarg} { | |
28 | 214 set ufoo 1 |
0 | 215 |
28 | 216 foreach i [split $uarg {}] { |
217 if {![string match \[0-9\] $i]} {set ufoo 0} | |
218 } | |
0 | 219 |
28 | 220 return $ufoo |
0 | 221 } |
222 | |
223 | |
224 proc urllog_msg {apublic anick achan amsg} { | |
28 | 225 global urllog_preferredmsg |
0 | 226 |
28 | 227 if {$apublic == 1} { |
228 putserv "$urllog_preferredmsg $achan :$amsg" | |
229 } else { | |
230 putserv "$urllog_preferredmsg $anick :$amsg" | |
231 } | |
0 | 232 } |
233 | |
234 | |
235 proc urllog_verb_msg {anick achan amsg} { | |
28 | 236 global urllog_verbose |
0 | 237 |
28 | 238 if {$urllog_verbose != 0} { |
239 urllog_msg 1 $anick $achan $amsg | |
240 } | |
0 | 241 } |
242 | |
243 | |
244 proc urllog_convert_ent {udata} { | |
28 | 245 global urllog_html_ent |
246 regsub -all " " $udata " " utmp | |
247 regsub -all "\r" $udata " " utmp | |
248 regsub -all "\n" $utmp " " utmp | |
249 regsub -all " *" $utmp " " utmp | |
250 regsub -all "\t" $utmp "" utmp | |
251 return [string map -nocase $urllog_html_ent $utmp] | |
0 | 252 } |
253 | |
254 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
255 proc urllog_escape { str } { |
28 | 256 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
257 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
258 |
0 | 259 #------------------------------------------------------------------------- |
260 proc urllog_get_short {utime} { | |
28 | 261 global urllog_shorturl urllog_shorturl_prefix urllog_shorturl |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
262 |
28 | 263 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
264 set ulen [string length $ustr] | |
0 | 265 |
28 | 266 set u1 [expr $utime / ($ulen * $ulen)] |
267 set utmp [expr $utime % ($ulen * $ulen)] | |
268 set u2 [expr $utmp / $ulen] | |
269 set u3 [expr $utmp % $ulen] | |
0 | 270 |
28 | 271 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 272 } |
273 | |
274 | |
275 #------------------------------------------------------------------------- | |
276 proc urllog_chop_url {url} { | |
28 | 277 global urllog_shorturl_orig |
278 if {[string length $url] > $urllog_shorturl_orig} { | |
279 return "[string range $url 0 $urllog_shorturl_orig]..." | |
280 } else { | |
281 return $url | |
282 } | |
0 | 283 } |
284 | |
285 #------------------------------------------------------------------------- | |
286 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
28 | 287 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 288 |
28 | 289 ### Let's check if we already know the URL |
290 set tmpURL [urllog_escape $urlStr] | |
291 urldb eval "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan FROM urls WHERE url='$tmpURL'" { | |
292 urllog_log "URL said by $urlNick ($urlStr) already known" | |
293 if {$urllog_shorturl != 0} { | |
294 set qstr "[urllog_get_short $urlID] " | |
295 } else { | |
296 set qstr "" | |
297 } | |
298 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
299 if {[string length $urlTitle] > 0} { | |
300 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" | |
301 } else { | |
302 set qstr "$urlmsg_alreadyknown $qstr" | |
303 } | |
304 urllog_verb_msg $urlNick $urlChan $qstr | |
305 return 0 | |
306 } | |
0 | 307 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
308 |
28 | 309 ### OK, the URL was not already known - thus we add it |
310 set sql "INSERT INTO urls (utime,url,user,host,chan) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]')" | |
311 if {[catch {urldb eval $sql} uerrmsg]} { | |
312 urllog_log "$uerrmsg on SQL:\n$sql" | |
313 return 0 | |
314 } | |
315 set urlID [urldb last_insert_rowid] | |
316 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" | |
0 | 317 |
318 | |
28 | 319 ### Let's say something, to confirm that everything went well. |
320 if {$urllog_shorturl != 0} { | |
321 set qstr "[urllog_get_short $urlID] " | |
322 } else { | |
323 set qstr "" | |
324 } | |
325 if {[string length $urlTitle] > 0} { | |
326 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
327 } else { | |
328 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
329 } | |
0 | 330 |
28 | 331 return 1 |
0 | 332 } |
333 | |
334 | |
335 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
336 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 337 upvar #0 $utoken state |
0 | 338 |
28 | 339 # Stop fetching data after 3000 bytes, this should be enough to |
340 # contain the head section of a HTML page. | |
341 if {$ucurr > 64000} { | |
342 set state(status) "ok" | |
343 } | |
0 | 344 } |
345 | |
346 #------------------------------------------------------------------------- | |
347 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 348 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
349 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
350 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
351 |
28 | 352 ### Print status to bot's log |
353 urllog_log "$urlStr ($urlNick@$urlChan)" | |
0 | 354 |
28 | 355 ### Try to determine the URL protocol component (if it is missing) |
356 set u_checktld 1 | |
357 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
358 set urlStr "http://$urlStr" | |
359 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
360 set urlStr "ftp://$urlStr" | |
361 } | |
0 | 362 |
28 | 363 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
364 # Check if the IP is on local network | |
365 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { | |
366 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | |
367 return 0 | |
368 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
369 |
28 | 370 # Skip TLD check for URLs with IP address |
371 set u_checktld 0 | |
372 } | |
0 | 373 |
28 | 374 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
375 urllog_log "Ignoring ShortURL." | |
376 return 0 | |
377 } | |
0 | 378 |
28 | 379 ### Check the PORT (if the ":" is there) |
380 set u_record [split $urlStr "/"] | |
381 set u_hostname [lindex $u_record 2] | |
382 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 383 |
28 | 384 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
385 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
386 return 0 | |
387 } | |
0 | 388 |
28 | 389 # Default to port 80 (HTTP) |
390 if {![urllog_isnumber $u_port]} { | |
391 set u_port 80 | |
392 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
393 |
28 | 394 ### Is it a http or ftp url? (FIX ME!) |
395 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { | |
396 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" | |
397 return 0 | |
398 } | |
0 | 399 |
28 | 400 ### Check the Top Level Domain (TLD) validity |
401 if {$u_checktld != 0} { | |
402 set u_sane [lindex [split $u_hostname "."] end] | |
403 set u_tld [lindex [split $u_sane ":"] 0] | |
404 set u_found 0 | |
0 | 405 |
28 | 406 if {[string length $u_tld] == 2} { |
407 # Assume all 2-letter domains to be valid :) | |
408 set u_found 1 | |
409 } else { | |
410 # Check our list of known TLDs | |
411 foreach itld $urllog_tlds { | |
412 if {[string match $itld $u_tld]} { | |
413 set u_found 1 | |
414 } | |
415 } | |
416 } | |
0 | 417 |
28 | 418 if {$u_found == 0} { |
419 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
420 return 0 | |
421 } | |
422 } | |
0 | 423 |
28 | 424 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
425 |
0 | 426 |
28 | 427 ### Do we perform additional optional checks? |
428 if {$urllog_check == 0 || [string range $urlStr 0 4] != "http:"} { | |
429 # No optional checks, just add the URL | |
430 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" | |
431 return 1 | |
432 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
433 |
28 | 434 ### Does the document pointed by the URL exist? |
435 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
436 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
437 urllog_log "HTTP request failed: $uerrmsg" | |
438 return 0 | |
439 } | |
0 | 440 |
28 | 441 if {[::http::status $utoken] == "timeout"} { |
442 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
443 urllog_log "HTTP request timed out ($urlStr)" | |
444 return 0 | |
445 } | |
0 | 446 |
28 | 447 if {[::http::status $utoken] != "ok"} { |
448 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
449 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
450 return 0 | |
451 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
452 |
28 | 453 # Fixme! Handle redirects! |
454 set ucode [::http::ncode $utoken] | |
455 if {$ucode >= 200 && $ucode <= 309} { | |
456 set udata [::http::data $utoken] | |
457 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] | |
458 set uconvert 0 | |
459 if {[llength $umatches] > 0} { | |
460 set uencoding [lindex $umatches 1] | |
461 if {[string length $uencoding] > 3} { | |
462 set uconvert 1 | |
463 } | |
464 } | |
0 | 465 |
28 | 466 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
467 if {[llength $umatches] > 0} { | |
468 set urlTitle [lindex $umatches 1] | |
469 if {$uconvert != 0} { | |
470 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { | |
471 urllog_log "Error in charset conversion: $cerrmsg" | |
472 } | |
473 } | |
474 set urlTitle [urllog_convert_ent $urlTitle] | |
475 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
476 } else { | |
477 set urlTitle "" | |
478 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
479 |
28 | 480 # Rasiatube hack |
481 if {[string match "*/rasiatube/view*" $urlStr]} { | |
482 set rasia 0 | |
483 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
484 if {[llength $umatches] > 0} { | |
485 set urlStr [lindex $umatches 1] | |
486 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
487 set rasia 1 | |
488 } else { | |
489 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
490 if {[llength $umatches] > 0} { | |
491 set urlStr [lindex $umatches 1] | |
492 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
493 set rasia 1 | |
494 } | |
495 } | |
496 | |
497 if {$rasia != 0} { | |
498 urllog_log "RasiaTube mangler: $urlStr" | |
499 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
500 } | |
501 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
502 |
28 | 503 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
504 return 1 | |
505 } else { | |
506 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
507 urllog_log "[::http::code $utoken] - $urlStr" | |
508 } | |
0 | 509 |
28 | 510 ::http::cleanup $utoken |
0 | 511 } |
512 | |
513 | |
514 #------------------------------------------------------------------------- | |
515 proc urllog_checkmsg {nick uhost hand chan text} { | |
28 | 516 ### Check the nick |
517 if {$nick == "*"} { | |
518 urllog_log "urllog_checkmsg: nick was wc, this should not happen." | |
519 return 0 | |
520 } | |
0 | 521 |
28 | 522 ### Do the URL checking |
523 foreach istr [split $text " "] { | |
524 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $istr]} { | |
525 urllog_checkurl $istr $nick $uhost $chan | |
526 } | |
527 } | |
0 | 528 |
28 | 529 return 0 |
0 | 530 } |
531 | |
532 | |
533 #------------------------------------------------------------------------- | |
534 ### Parse arguments, find and show the results | |
535 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
536 global urllog_shorturl urldb |
28 | 537 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 538 |
28 | 539 if {$upublic == 0} { |
540 set ulimit 5 | |
541 } else { | |
542 set ulimit 3 | |
543 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
544 |
28 | 545 ### Parse the given command |
546 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 547 |
28 | 548 set ftokens [split $utext " "] |
549 set fpatlist "" | |
550 foreach ftoken $ftokens { | |
551 set fprefix [string range $ftoken 0 0] | |
552 set fpattern [string range $ftoken 1 end] | |
0 | 553 |
28 | 554 if {$fprefix == "-"} { |
555 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
556 } elseif {$fprefix == "%"} { | |
557 lappend fpatlist "user='[urllog_escape $fpattern]'" | |
558 } elseif {$fprefix == "@"} { | |
559 # foo | |
560 } else { | |
561 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
562 } | |
563 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
564 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
565 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
566 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
567 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
568 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
569 } |
28 | 570 set iresults 0 |
571 set query "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" | |
572 urldb eval $query { | |
573 incr iresults | |
574 set shortURL $uurl | |
575 if {$urllog_shorturl != 0 && $urlID != ""} { | |
576 set shortURL "$shortURL [urllog_get_short $urlID]" | |
577 } | |
578 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
579 } | |
580 | |
581 if {$iresults == 0} { | |
582 # If no URLs were found | |
583 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
584 } | |
0 | 585 |
28 | 586 return 0 |
0 | 587 } |
588 | |
589 | |
590 #------------------------------------------------------------------------- | |
591 ### Finding binded functions | |
592 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 593 urllog_find $unick $uhand $uchan $utext 1 |
594 return 0 | |
0 | 595 } |
596 | |
597 | |
598 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 599 urllog_find $unick $uhand "" $utext 0 |
600 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
601 } |
0 | 602 |
603 | |
604 # end of script |