Mercurial > hg > egg-tcls
annotate urllog.tcl @ 70:15fc72bc3f3e
More cosmetics.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 11 Sep 2011 17:48:23 +0300 |
parents | 3762c621d1c3 |
children | 646b2fd67312 |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
49 | 3 # URLLog v2.0.1 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
8 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
9 # |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
10 # NOTICE! If you are upgrading to v2.0+ from any 1.x version, you |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
11 # may want to run a conversion script against your URL-database file, |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
12 # if you wish to preserve the old data. |
0 | 13 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
14 # See convert_urllog_db.tcl for more information. |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
15 # |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # If you are making a fresh install, you will need to create the |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
17 # initial SQLite3 database with the required table schemas. You |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
18 # can do that by running: create_urllog_db.tcl |
0 | 19 # |
20 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
21 |
0 | 22 ### |
23 ### HTTP options | |
24 ### | |
25 # Set to 1 if you want to use proxy | |
26 set http_proxy 0 | |
27 | |
28 # Proxy host and port number (only used if enabled above) | |
29 set http_proxy_host "" | |
30 set http_proxy_port 8080 | |
31 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
32 # 1 = Enable experimental TLS/SSL support. This may not work. |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
33 set http_tls_support 0 |
0 | 34 |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
35 |
0 | 36 ### |
37 ### General options | |
38 ### | |
39 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
40 # Filename of the SQLite URL database file |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
41 set urllog_db_file "urllog.sqlite" |
0 | 42 |
43 | |
44 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
45 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
46 set urllog_verbose 1 | |
47 | |
48 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
49 # 1 = Enable logging of various script actions into bot's log |
0 | 50 # 0 = Don't. |
51 set urllog_logmsg 1 | |
52 | |
53 | |
54 # 1 = Check URLs for validity and existence before adding. | |
55 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
56 set urllog_check 1 | |
57 | |
58 | |
59 ### | |
60 ### Search related settings | |
61 ### | |
62 | |
63 # 0 = No search-commands available | |
64 # 1 = Search enabled | |
65 set urllog_search 1 | |
66 | |
67 | |
68 # How many URL's should the !urlfind command show (maximum limit) | |
69 set urllog_showmax_pub 3 | |
70 | |
71 | |
72 # For private-search, this is the default limit (user can change it) | |
73 set urllog_showmax_priv 6 | |
74 | |
75 | |
76 ### | |
77 ### ShortURL-settings | |
78 ### | |
79 | |
80 # 1 = Use ShortURLs | |
81 # 0 = Don't. | |
82 set urllog_shorturl 1 | |
83 | |
84 # Max length of original URL to be shown | |
85 set urllog_shorturl_orig 30 | |
86 | |
87 # Path to PHP/CGI-script that redirects ShortURLs | |
88 set urllog_shorturl_prefix "http://tnsp.org/u/" | |
89 | |
90 | |
91 ### | |
92 ### Message-texts | |
93 ### | |
94 | |
95 # No such host was found | |
96 set urlmsg_nosuchhost "ei tommosta oo!" | |
97 | |
98 # Could not connect host (I/O errors etc) | |
99 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
100 | |
101 # HTTP timeout | |
102 set urlmsg_timeout "ei jaksa ootella" | |
103 | |
104 # No such document was found | |
105 set urlmsg_errorgettingdoc "siitosvirhe" | |
106 | |
107 # URL was already known (was in database) | |
108 set urlmsg_alreadyknown "wanha!" | |
109 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
110 | |
111 # No match was found when searched with !urlfind or other command | |
112 set urlmsg_nomatch "Ei osumia." | |
113 | |
114 | |
115 ### | |
116 ### Things that you usually don't need to touch ... | |
117 ### | |
118 | |
119 # What IRC "command" should we use to send messages: | |
120 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
121 set urllog_preferredmsg "PRIVMSG" | |
122 | |
123 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
124 # (Now includes the new IANA published TLDs) | |
125 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
126 | |
127 | |
128 ########################################################################## | |
129 # No need to look below this line | |
130 ########################################################################## | |
131 set urllog_name "URLLog" | |
49 | 132 set urllog_version "2.0.1" |
0 | 133 |
134 set urllog_tlds [split $urllog_tlds ","] | |
135 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
136 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
137 set urllog_html_ent [split "‏||—|-|‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
0 | 138 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
139 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
140 package require sqlite3 |
0 | 141 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
142 |
0 | 143 ### Binding initializations |
144 if {$urllog_search != 0} { | |
28 | 145 bind pub - !urlfind urllog_pub_urlfind |
146 bind msg - urlfind urllog_msg_urlfind | |
0 | 147 } |
148 | |
149 bind pubm - *.* urllog_checkmsg | |
150 bind topc - *.* urllog_checkmsg | |
151 | |
152 | |
153 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
154 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 155 putlog "$urllog_message" |
156 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
157 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
158 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
159 if {$http_proxy != 0} { |
28 | 160 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
161 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
162 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
163 if {$http_tls_support != 0} { |
28 | 164 package require tls |
165 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"] | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
166 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
167 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
168 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
169 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 170 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
171 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
172 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
173 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
174 |
0 | 175 if {$http_proxy != 0} { |
28 | 176 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 177 } |
178 | |
179 if {$urllog_check != 0} { | |
28 | 180 putlog " (Additional URL validity checks enabled)" |
0 | 181 } |
182 | |
183 if {$urllog_verbose != 0} { | |
28 | 184 putlog " (Verbose mode enabled)" |
0 | 185 } |
186 | |
187 if {$urllog_search != 0} { | |
28 | 188 putlog " (Search commands enabled)" |
0 | 189 } |
190 | |
191 #------------------------------------------------------------------------- | |
192 ### Utility functions | |
193 proc urllog_log {arg} { | |
28 | 194 global urllog_logmsg urllog_name |
0 | 195 |
28 | 196 if {$urllog_logmsg != 0} { |
197 putlog "$urllog_name: $arg" | |
198 } | |
0 | 199 } |
200 | |
201 | |
202 proc urllog_ctime { utime } { | |
203 | |
28 | 204 if {$utime == "" || $utime == "*"} { |
205 set utime 0 | |
206 } | |
0 | 207 |
28 | 208 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 209 } |
210 | |
211 | |
212 proc urllog_isnumber {uarg} { | |
213 | |
28 | 214 foreach i [split $uarg {}] { |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
215 if {![string match \[0-9\] $i]} { return 0 } |
28 | 216 } |
0 | 217 |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
218 return 1 |
0 | 219 } |
220 | |
221 | |
222 proc urllog_msg {apublic anick achan amsg} { | |
28 | 223 global urllog_preferredmsg |
0 | 224 |
28 | 225 if {$apublic == 1} { |
226 putserv "$urllog_preferredmsg $achan :$amsg" | |
227 } else { | |
228 putserv "$urllog_preferredmsg $anick :$amsg" | |
229 } | |
0 | 230 } |
231 | |
232 | |
233 proc urllog_verb_msg {anick achan amsg} { | |
28 | 234 global urllog_verbose |
0 | 235 |
28 | 236 if {$urllog_verbose != 0} { |
237 urllog_msg 1 $anick $achan $amsg | |
238 } | |
0 | 239 } |
240 | |
241 | |
242 proc urllog_convert_ent {udata} { | |
28 | 243 global urllog_html_ent |
244 regsub -all " " $udata " " utmp | |
245 regsub -all "\r" $udata " " utmp | |
246 regsub -all "\n" $utmp " " utmp | |
247 regsub -all " *" $utmp " " utmp | |
248 regsub -all "\t" $utmp "" utmp | |
249 return [string map -nocase $urllog_html_ent $utmp] | |
0 | 250 } |
251 | |
252 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
253 proc urllog_escape { str } { |
28 | 254 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
255 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
256 |
0 | 257 #------------------------------------------------------------------------- |
258 proc urllog_get_short {utime} { | |
68 | 259 global urllog_shorturl_prefix |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
260 |
28 | 261 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
262 set ulen [string length $ustr] | |
0 | 263 |
28 | 264 set u1 [expr $utime / ($ulen * $ulen)] |
265 set utmp [expr $utime % ($ulen * $ulen)] | |
266 set u2 [expr $utmp / $ulen] | |
267 set u3 [expr $utmp % $ulen] | |
0 | 268 |
28 | 269 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 270 } |
271 | |
272 | |
273 #------------------------------------------------------------------------- | |
274 proc urllog_chop_url {url} { | |
28 | 275 global urllog_shorturl_orig |
68 | 276 |
28 | 277 if {[string length $url] > $urllog_shorturl_orig} { |
278 return "[string range $url 0 $urllog_shorturl_orig]..." | |
279 } else { | |
280 return $url | |
281 } | |
0 | 282 } |
283 | |
284 #------------------------------------------------------------------------- | |
285 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
28 | 286 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 287 |
28 | 288 ### Let's check if we already know the URL |
289 set tmpURL [urllog_escape $urlStr] | |
290 urldb eval "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan FROM urls WHERE url='$tmpURL'" { | |
291 urllog_log "URL said by $urlNick ($urlStr) already known" | |
292 if {$urllog_shorturl != 0} { | |
293 set qstr "[urllog_get_short $urlID] " | |
294 } else { | |
295 set qstr "" | |
296 } | |
297 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
298 if {[string length $urlTitle] > 0} { | |
299 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" | |
300 } else { | |
301 set qstr "$urlmsg_alreadyknown $qstr" | |
302 } | |
303 urllog_verb_msg $urlNick $urlChan $qstr | |
304 return 0 | |
305 } | |
0 | 306 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
307 |
28 | 308 ### OK, the URL was not already known - thus we add it |
309 set sql "INSERT INTO urls (utime,url,user,host,chan) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]')" | |
310 if {[catch {urldb eval $sql} uerrmsg]} { | |
311 urllog_log "$uerrmsg on SQL:\n$sql" | |
312 return 0 | |
313 } | |
314 set urlID [urldb last_insert_rowid] | |
315 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" | |
0 | 316 |
317 | |
28 | 318 ### Let's say something, to confirm that everything went well. |
319 if {$urllog_shorturl != 0} { | |
320 set qstr "[urllog_get_short $urlID] " | |
321 } else { | |
322 set qstr "" | |
323 } | |
324 if {[string length $urlTitle] > 0} { | |
325 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
326 } else { | |
327 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
328 } | |
0 | 329 |
28 | 330 return 1 |
0 | 331 } |
332 | |
333 | |
334 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
335 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 336 upvar #0 $utoken state |
0 | 337 |
28 | 338 # Stop fetching data after 3000 bytes, this should be enough to |
339 # contain the head section of a HTML page. | |
340 if {$ucurr > 64000} { | |
341 set state(status) "ok" | |
342 } | |
0 | 343 } |
344 | |
345 #------------------------------------------------------------------------- | |
346 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 347 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
348 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
349 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
350 |
28 | 351 ### Print status to bot's log |
352 urllog_log "$urlStr ($urlNick@$urlChan)" | |
0 | 353 |
28 | 354 ### Try to determine the URL protocol component (if it is missing) |
355 set u_checktld 1 | |
356 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
357 set urlStr "http://$urlStr" | |
358 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
359 set urlStr "ftp://$urlStr" | |
360 } | |
0 | 361 |
28 | 362 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
363 # Check if the IP is on local network | |
364 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { | |
365 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | |
366 return 0 | |
367 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
368 |
28 | 369 # Skip TLD check for URLs with IP address |
370 set u_checktld 0 | |
371 } | |
0 | 372 |
28 | 373 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
374 urllog_log "Ignoring ShortURL." | |
375 return 0 | |
376 } | |
0 | 377 |
28 | 378 ### Check the PORT (if the ":" is there) |
379 set u_record [split $urlStr "/"] | |
380 set u_hostname [lindex $u_record 2] | |
381 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 382 |
28 | 383 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
384 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
385 return 0 | |
386 } | |
0 | 387 |
28 | 388 # Default to port 80 (HTTP) |
389 if {![urllog_isnumber $u_port]} { | |
390 set u_port 80 | |
391 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
392 |
28 | 393 ### Is it a http or ftp url? (FIX ME!) |
394 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { | |
395 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" | |
396 return 0 | |
397 } | |
0 | 398 |
28 | 399 ### Check the Top Level Domain (TLD) validity |
400 if {$u_checktld != 0} { | |
401 set u_sane [lindex [split $u_hostname "."] end] | |
402 set u_tld [lindex [split $u_sane ":"] 0] | |
403 set u_found 0 | |
0 | 404 |
28 | 405 if {[string length $u_tld] == 2} { |
406 # Assume all 2-letter domains to be valid :) | |
407 set u_found 1 | |
408 } else { | |
409 # Check our list of known TLDs | |
410 foreach itld $urllog_tlds { | |
411 if {[string match $itld $u_tld]} { | |
412 set u_found 1 | |
413 } | |
414 } | |
415 } | |
0 | 416 |
28 | 417 if {$u_found == 0} { |
418 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
419 return 0 | |
420 } | |
421 } | |
0 | 422 |
28 | 423 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
424 |
0 | 425 |
28 | 426 ### Do we perform additional optional checks? |
427 if {$urllog_check == 0 || [string range $urlStr 0 4] != "http:"} { | |
428 # No optional checks, just add the URL | |
429 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" | |
430 return 1 | |
431 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
432 |
28 | 433 ### Does the document pointed by the URL exist? |
434 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
435 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
436 urllog_log "HTTP request failed: $uerrmsg" | |
437 return 0 | |
438 } | |
0 | 439 |
28 | 440 if {[::http::status $utoken] == "timeout"} { |
441 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
442 urllog_log "HTTP request timed out ($urlStr)" | |
443 return 0 | |
444 } | |
0 | 445 |
28 | 446 if {[::http::status $utoken] != "ok"} { |
447 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
448 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
449 return 0 | |
450 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
451 |
28 | 452 # Fixme! Handle redirects! |
453 set ucode [::http::ncode $utoken] | |
454 if {$ucode >= 200 && $ucode <= 309} { | |
455 set udata [::http::data $utoken] | |
456 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] | |
457 set uconvert 0 | |
458 if {[llength $umatches] > 0} { | |
459 set uencoding [lindex $umatches 1] | |
460 if {[string length $uencoding] > 3} { | |
461 set uconvert 1 | |
462 } | |
463 } | |
0 | 464 |
28 | 465 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
466 if {[llength $umatches] > 0} { | |
467 set urlTitle [lindex $umatches 1] | |
468 if {$uconvert != 0} { | |
469 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { | |
470 urllog_log "Error in charset conversion: $cerrmsg" | |
471 } | |
472 } | |
473 set urlTitle [urllog_convert_ent $urlTitle] | |
474 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
475 } else { | |
476 set urlTitle "" | |
477 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
478 |
28 | 479 # Rasiatube hack |
480 if {[string match "*/rasiatube/view*" $urlStr]} { | |
481 set rasia 0 | |
482 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
483 if {[llength $umatches] > 0} { | |
484 set urlStr [lindex $umatches 1] | |
485 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
486 set rasia 1 | |
487 } else { | |
488 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
489 if {[llength $umatches] > 0} { | |
490 set urlStr [lindex $umatches 1] | |
491 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
492 set rasia 1 | |
493 } | |
494 } | |
495 | |
496 if {$rasia != 0} { | |
497 urllog_log "RasiaTube mangler: $urlStr" | |
498 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
499 } | |
500 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
501 |
28 | 502 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
503 return 1 | |
504 } else { | |
505 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
506 urllog_log "[::http::code $utoken] - $urlStr" | |
507 } | |
0 | 508 |
28 | 509 ::http::cleanup $utoken |
0 | 510 } |
511 | |
512 | |
513 #------------------------------------------------------------------------- | |
514 proc urllog_checkmsg {nick uhost hand chan text} { | |
28 | 515 ### Check the nick |
516 if {$nick == "*"} { | |
517 urllog_log "urllog_checkmsg: nick was wc, this should not happen." | |
518 return 0 | |
519 } | |
0 | 520 |
28 | 521 ### Do the URL checking |
522 foreach istr [split $text " "] { | |
523 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $istr]} { | |
524 urllog_checkurl $istr $nick $uhost $chan | |
525 } | |
526 } | |
0 | 527 |
28 | 528 return 0 |
0 | 529 } |
530 | |
531 | |
532 #------------------------------------------------------------------------- | |
533 ### Parse arguments, find and show the results | |
534 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
535 global urllog_shorturl urldb |
28 | 536 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 537 |
28 | 538 if {$upublic == 0} { |
539 set ulimit 5 | |
540 } else { | |
541 set ulimit 3 | |
542 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
543 |
28 | 544 ### Parse the given command |
545 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 546 |
28 | 547 set ftokens [split $utext " "] |
548 set fpatlist "" | |
549 foreach ftoken $ftokens { | |
550 set fprefix [string range $ftoken 0 0] | |
551 set fpattern [string range $ftoken 1 end] | |
0 | 552 |
28 | 553 if {$fprefix == "-"} { |
554 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
555 } elseif {$fprefix == "%"} { | |
556 lappend fpatlist "user='[urllog_escape $fpattern]'" | |
557 } elseif {$fprefix == "@"} { | |
558 # foo | |
559 } else { | |
560 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
561 } | |
562 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
563 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
564 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
565 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
566 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
567 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
568 } |
68 | 569 |
28 | 570 set iresults 0 |
68 | 571 set usql "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" |
572 urldb eval $usql { | |
28 | 573 incr iresults |
574 set shortURL $uurl | |
575 if {$urllog_shorturl != 0 && $urlID != ""} { | |
576 set shortURL "$shortURL [urllog_get_short $urlID]" | |
577 } | |
578 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
579 } | |
580 | |
581 if {$iresults == 0} { | |
582 # If no URLs were found | |
583 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
584 } | |
0 | 585 |
28 | 586 return 0 |
0 | 587 } |
588 | |
589 | |
590 #------------------------------------------------------------------------- | |
591 ### Finding binded functions | |
592 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 593 urllog_find $unick $uhand $uchan $utext 1 |
594 return 0 | |
0 | 595 } |
596 | |
597 | |
598 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 599 urllog_find $unick $uhand "" $utext 0 |
600 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
601 } |
0 | 602 |
603 # end of script |