Mercurial > hg > egg-tcls
annotate urllog.tcl @ 82:1bbc79f41a1c
urllog: Rename few variables for clarity.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 11 Sep 2011 19:37:40 +0300 |
parents | 17e542b7985a |
children | f171a9fb7b7b |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
49 | 3 # URLLog v2.0.1 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
8 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
9 # This script requires SQLite TCL extension. Under Debian, you need: |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
10 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course) |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
11 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
12 # NOTICE! If you are upgrading to URLLog v2.0+ from any 1.x version, you |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
13 # may want to run a conversion script against your URL-database file, |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
14 # if you wish to preserve the old data. |
0 | 15 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # See convert_urllog_db.tcl for more information. |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
17 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
18 # If you are doing a fresh install, you will need to create the |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
19 # initial SQLite3 database with the required table schemas. You |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
20 # can do that by running: create_urllog_db.tcl |
0 | 21 # |
22 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
23 |
0 | 24 ### |
25 ### HTTP options | |
26 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
27 # Set to 1 if you want to enable use of HTTP proxy. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
28 # If you do, you MUST set the proxy settings below too. |
0 | 29 set http_proxy 0 |
30 | |
31 # Proxy host and port number (only used if enabled above) | |
32 set http_proxy_host "" | |
33 set http_proxy_port 8080 | |
34 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
35 # Enable _experimental_ TLS/SSL support. This may not work at all. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
36 # If unsure, leave this option disabled (0). |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
37 set http_tls_support 0 |
0 | 38 |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
39 |
0 | 40 ### |
41 ### General options | |
42 ### | |
43 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
44 # Filename of the SQLite URL database file |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
45 set urllog_db_file "urllog.sqlite" |
0 | 46 |
47 | |
48 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
49 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
50 set urllog_verbose 1 | |
51 | |
52 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
53 # 1 = Enable logging of various script actions into bot's log |
0 | 54 # 0 = Don't. |
55 set urllog_logmsg 1 | |
56 | |
57 | |
58 # 1 = Check URLs for validity and existence before adding. | |
59 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
60 set urllog_check 1 | |
61 | |
62 | |
63 ### | |
64 ### Search related settings | |
65 ### | |
66 | |
67 # 0 = No search-commands available | |
68 # 1 = Search enabled | |
69 set urllog_search 1 | |
70 | |
71 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
72 # Limit how many URLs should the "!urlfind" command show at most. |
0 | 73 set urllog_showmax_pub 3 |
74 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
75 # Same as above, but for private message search. |
0 | 76 set urllog_showmax_priv 6 |
77 | |
78 | |
79 ### | |
80 ### ShortURL-settings | |
81 ### | |
82 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
83 # 1 = Enable showing of ShortURLs |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
84 # 0 = ShortURLs not shown in any bot actions |
0 | 85 set urllog_shorturl 1 |
86 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
87 # Max length of original URL to be shown, rest is chopped |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
88 # off if the URL is longer than the specified amount. |
0 | 89 set urllog_shorturl_orig 30 |
90 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
91 # Web server URL that handles redirects of ShortURLs |
0 | 92 set urllog_shorturl_prefix "http://tnsp.org/u/" |
93 | |
94 | |
95 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
96 ### Message texts (informal, errors, etc.) |
0 | 97 ### |
98 | |
99 # No such host was found | |
100 set urlmsg_nosuchhost "ei tommosta oo!" | |
101 | |
102 # Could not connect host (I/O errors etc) | |
103 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
104 | |
105 # HTTP timeout | |
106 set urlmsg_timeout "ei jaksa ootella" | |
107 | |
108 # No such document was found | |
109 set urlmsg_errorgettingdoc "siitosvirhe" | |
110 | |
111 # URL was already known (was in database) | |
112 set urlmsg_alreadyknown "wanha!" | |
113 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
114 | |
115 # No match was found when searched with !urlfind or other command | |
116 set urlmsg_nomatch "Ei osumia." | |
117 | |
118 | |
119 ### | |
120 ### Things that you usually don't need to touch ... | |
121 ### | |
122 | |
123 # What IRC "command" should we use to send messages: | |
124 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
125 set urllog_preferredmsg "PRIVMSG" | |
126 | |
127 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
128 # (Now includes the new IANA published TLDs) | |
129 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
130 | |
131 | |
132 ########################################################################## | |
133 # No need to look below this line | |
134 ########################################################################## | |
135 set urllog_name "URLLog" | |
49 | 136 set urllog_version "2.0.1" |
0 | 137 |
138 set urllog_tlds [split $urllog_tlds ","] | |
139 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
140 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
141 set urllog_html_ent [split "‏||—|-|‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
0 | 142 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
143 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
144 package require sqlite3 |
0 | 145 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
146 |
0 | 147 ### Binding initializations |
148 if {$urllog_search != 0} { | |
28 | 149 bind pub - !urlfind urllog_pub_urlfind |
150 bind msg - urlfind urllog_msg_urlfind | |
0 | 151 } |
152 | |
153 bind pubm - *.* urllog_checkmsg | |
154 bind topc - *.* urllog_checkmsg | |
155 | |
156 | |
157 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
158 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 159 putlog "$urllog_message" |
160 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
161 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
162 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
163 if {$http_proxy != 0} { |
28 | 164 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
165 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
166 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
167 if {$http_tls_support != 0} { |
28 | 168 package require tls |
169 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"] | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
170 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
171 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
172 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
173 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 174 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
175 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
176 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
177 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
178 |
0 | 179 if {$http_proxy != 0} { |
28 | 180 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 181 } |
182 | |
183 if {$urllog_check != 0} { | |
28 | 184 putlog " (Additional URL validity checks enabled)" |
0 | 185 } |
186 | |
187 if {$urllog_verbose != 0} { | |
28 | 188 putlog " (Verbose mode enabled)" |
0 | 189 } |
190 | |
191 if {$urllog_search != 0} { | |
28 | 192 putlog " (Search commands enabled)" |
0 | 193 } |
194 | |
195 #------------------------------------------------------------------------- | |
196 ### Utility functions | |
197 proc urllog_log {arg} { | |
28 | 198 global urllog_logmsg urllog_name |
0 | 199 |
28 | 200 if {$urllog_logmsg != 0} { |
201 putlog "$urllog_name: $arg" | |
202 } | |
0 | 203 } |
204 | |
205 | |
206 proc urllog_ctime { utime } { | |
207 | |
28 | 208 if {$utime == "" || $utime == "*"} { |
209 set utime 0 | |
210 } | |
0 | 211 |
28 | 212 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 213 } |
214 | |
215 | |
216 proc urllog_isnumber {uarg} { | |
217 | |
28 | 218 foreach i [split $uarg {}] { |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
219 if {![string match \[0-9\] $i]} { return 0 } |
28 | 220 } |
0 | 221 |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
222 return 1 |
0 | 223 } |
224 | |
225 | |
226 proc urllog_msg {apublic anick achan amsg} { | |
28 | 227 global urllog_preferredmsg |
0 | 228 |
28 | 229 if {$apublic == 1} { |
230 putserv "$urllog_preferredmsg $achan :$amsg" | |
231 } else { | |
232 putserv "$urllog_preferredmsg $anick :$amsg" | |
233 } | |
0 | 234 } |
235 | |
236 | |
237 proc urllog_verb_msg {anick achan amsg} { | |
28 | 238 global urllog_verbose |
0 | 239 |
28 | 240 if {$urllog_verbose != 0} { |
241 urllog_msg 1 $anick $achan $amsg | |
242 } | |
0 | 243 } |
244 | |
245 | |
246 proc urllog_convert_ent {udata} { | |
28 | 247 global urllog_html_ent |
248 regsub -all " " $udata " " utmp | |
249 regsub -all "\r" $udata " " utmp | |
250 regsub -all "\n" $utmp " " utmp | |
251 regsub -all " *" $utmp " " utmp | |
252 regsub -all "\t" $utmp "" utmp | |
253 return [string map -nocase $urllog_html_ent $utmp] | |
0 | 254 } |
255 | |
256 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
257 proc urllog_escape { str } { |
28 | 258 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
259 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
260 |
0 | 261 #------------------------------------------------------------------------- |
262 proc urllog_get_short {utime} { | |
68 | 263 global urllog_shorturl_prefix |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
264 |
28 | 265 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
266 set ulen [string length $ustr] | |
0 | 267 |
28 | 268 set u1 [expr $utime / ($ulen * $ulen)] |
269 set utmp [expr $utime % ($ulen * $ulen)] | |
270 set u2 [expr $utmp / $ulen] | |
271 set u3 [expr $utmp % $ulen] | |
0 | 272 |
28 | 273 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 274 } |
275 | |
276 | |
277 #------------------------------------------------------------------------- | |
278 proc urllog_chop_url {url} { | |
28 | 279 global urllog_shorturl_orig |
68 | 280 |
28 | 281 if {[string length $url] > $urllog_shorturl_orig} { |
282 return "[string range $url 0 $urllog_shorturl_orig]..." | |
283 } else { | |
284 return $url | |
285 } | |
0 | 286 } |
287 | |
288 #------------------------------------------------------------------------- | |
289 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
28 | 290 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 291 |
28 | 292 ### Let's check if we already know the URL |
293 set tmpURL [urllog_escape $urlStr] | |
294 urldb eval "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan FROM urls WHERE url='$tmpURL'" { | |
295 urllog_log "URL said by $urlNick ($urlStr) already known" | |
296 if {$urllog_shorturl != 0} { | |
297 set qstr "[urllog_get_short $urlID] " | |
298 } else { | |
299 set qstr "" | |
300 } | |
301 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
302 if {[string length $urlTitle] > 0} { | |
303 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" | |
304 } else { | |
305 set qstr "$urlmsg_alreadyknown $qstr" | |
306 } | |
307 urllog_verb_msg $urlNick $urlChan $qstr | |
308 return 0 | |
309 } | |
0 | 310 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
311 |
28 | 312 ### OK, the URL was not already known - thus we add it |
313 set sql "INSERT INTO urls (utime,url,user,host,chan) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]')" | |
314 if {[catch {urldb eval $sql} uerrmsg]} { | |
315 urllog_log "$uerrmsg on SQL:\n$sql" | |
316 return 0 | |
317 } | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
318 set uid [urldb last_insert_rowid] |
28 | 319 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 320 |
321 | |
28 | 322 ### Let's say something, to confirm that everything went well. |
323 if {$urllog_shorturl != 0} { | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
324 set qstr "[urllog_get_short $uid] " |
28 | 325 } else { |
326 set qstr "" | |
327 } | |
328 if {[string length $urlTitle] > 0} { | |
329 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
330 } else { | |
331 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
332 } | |
0 | 333 |
28 | 334 return 1 |
0 | 335 } |
336 | |
337 | |
338 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
339 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 340 upvar #0 $utoken state |
0 | 341 |
28 | 342 # Stop fetching data after 3000 bytes, this should be enough to |
343 # contain the head section of a HTML page. | |
344 if {$ucurr > 64000} { | |
345 set state(status) "ok" | |
346 } | |
0 | 347 } |
348 | |
349 #------------------------------------------------------------------------- | |
350 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 351 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
352 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
353 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
354 |
28 | 355 ### Print status to bot's log |
356 urllog_log "$urlStr ($urlNick@$urlChan)" | |
0 | 357 |
28 | 358 ### Try to determine the URL protocol component (if it is missing) |
359 set u_checktld 1 | |
360 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
361 set urlStr "http://$urlStr" | |
362 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
363 set urlStr "ftp://$urlStr" | |
364 } | |
0 | 365 |
28 | 366 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
367 # Check if the IP is on local network | |
368 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { | |
369 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | |
370 return 0 | |
371 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
372 |
28 | 373 # Skip TLD check for URLs with IP address |
374 set u_checktld 0 | |
375 } | |
0 | 376 |
28 | 377 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
378 urllog_log "Ignoring ShortURL." | |
379 return 0 | |
380 } | |
0 | 381 |
28 | 382 ### Check the PORT (if the ":" is there) |
383 set u_record [split $urlStr "/"] | |
384 set u_hostname [lindex $u_record 2] | |
385 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 386 |
28 | 387 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
388 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
389 return 0 | |
390 } | |
0 | 391 |
28 | 392 # Default to port 80 (HTTP) |
393 if {![urllog_isnumber $u_port]} { | |
394 set u_port 80 | |
395 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
396 |
28 | 397 ### Is it a http or ftp url? (FIX ME!) |
398 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { | |
399 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" | |
400 return 0 | |
401 } | |
0 | 402 |
28 | 403 ### Check the Top Level Domain (TLD) validity |
404 if {$u_checktld != 0} { | |
405 set u_sane [lindex [split $u_hostname "."] end] | |
406 set u_tld [lindex [split $u_sane ":"] 0] | |
407 set u_found 0 | |
0 | 408 |
28 | 409 if {[string length $u_tld] == 2} { |
410 # Assume all 2-letter domains to be valid :) | |
411 set u_found 1 | |
412 } else { | |
413 # Check our list of known TLDs | |
414 foreach itld $urllog_tlds { | |
415 if {[string match $itld $u_tld]} { | |
416 set u_found 1 | |
417 } | |
418 } | |
419 } | |
0 | 420 |
28 | 421 if {$u_found == 0} { |
422 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
423 return 0 | |
424 } | |
425 } | |
0 | 426 |
28 | 427 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
428 |
0 | 429 |
28 | 430 ### Do we perform additional optional checks? |
431 if {$urllog_check == 0 || [string range $urlStr 0 4] != "http:"} { | |
432 # No optional checks, just add the URL | |
433 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" | |
434 return 1 | |
435 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
436 |
28 | 437 ### Does the document pointed by the URL exist? |
438 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
439 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
440 urllog_log "HTTP request failed: $uerrmsg" | |
441 return 0 | |
442 } | |
0 | 443 |
28 | 444 if {[::http::status $utoken] == "timeout"} { |
445 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
446 urllog_log "HTTP request timed out ($urlStr)" | |
447 return 0 | |
448 } | |
0 | 449 |
28 | 450 if {[::http::status $utoken] != "ok"} { |
451 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
452 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
453 return 0 | |
454 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
455 |
28 | 456 # Fixme! Handle redirects! |
457 set ucode [::http::ncode $utoken] | |
458 if {$ucode >= 200 && $ucode <= 309} { | |
459 set udata [::http::data $utoken] | |
460 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] | |
461 set uconvert 0 | |
462 if {[llength $umatches] > 0} { | |
463 set uencoding [lindex $umatches 1] | |
464 if {[string length $uencoding] > 3} { | |
465 set uconvert 1 | |
466 } | |
467 } | |
0 | 468 |
28 | 469 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
470 if {[llength $umatches] > 0} { | |
471 set urlTitle [lindex $umatches 1] | |
472 if {$uconvert != 0} { | |
473 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { | |
474 urllog_log "Error in charset conversion: $cerrmsg" | |
475 } | |
476 } | |
477 set urlTitle [urllog_convert_ent $urlTitle] | |
478 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
479 } else { | |
480 set urlTitle "" | |
481 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
482 |
28 | 483 # Rasiatube hack |
484 if {[string match "*/rasiatube/view*" $urlStr]} { | |
485 set rasia 0 | |
486 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
487 if {[llength $umatches] > 0} { | |
488 set urlStr [lindex $umatches 1] | |
489 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
490 set rasia 1 | |
491 } else { | |
492 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
493 if {[llength $umatches] > 0} { | |
494 set urlStr [lindex $umatches 1] | |
495 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
496 set rasia 1 | |
497 } | |
498 } | |
499 | |
500 if {$rasia != 0} { | |
501 urllog_log "RasiaTube mangler: $urlStr" | |
502 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
503 } | |
504 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
505 |
28 | 506 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
507 return 1 | |
508 } else { | |
509 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
510 urllog_log "[::http::code $utoken] - $urlStr" | |
511 } | |
0 | 512 |
28 | 513 ::http::cleanup $utoken |
0 | 514 } |
515 | |
516 | |
517 #------------------------------------------------------------------------- | |
518 proc urllog_checkmsg {nick uhost hand chan text} { | |
28 | 519 ### Check the nick |
520 if {$nick == "*"} { | |
521 urllog_log "urllog_checkmsg: nick was wc, this should not happen." | |
522 return 0 | |
523 } | |
0 | 524 |
28 | 525 ### Do the URL checking |
526 foreach istr [split $text " "] { | |
527 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $istr]} { | |
528 urllog_checkurl $istr $nick $uhost $chan | |
529 } | |
530 } | |
0 | 531 |
28 | 532 return 0 |
0 | 533 } |
534 | |
535 | |
536 #------------------------------------------------------------------------- | |
537 ### Parse arguments, find and show the results | |
538 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
539 global urllog_shorturl urldb |
28 | 540 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 541 |
28 | 542 if {$upublic == 0} { |
543 set ulimit 5 | |
544 } else { | |
545 set ulimit 3 | |
546 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
547 |
28 | 548 ### Parse the given command |
549 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 550 |
28 | 551 set ftokens [split $utext " "] |
552 set fpatlist "" | |
553 foreach ftoken $ftokens { | |
554 set fprefix [string range $ftoken 0 0] | |
555 set fpattern [string range $ftoken 1 end] | |
0 | 556 |
28 | 557 if {$fprefix == "-"} { |
558 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
559 } elseif {$fprefix == "%"} { | |
560 lappend fpatlist "user='[urllog_escape $fpattern]'" | |
561 } elseif {$fprefix == "@"} { | |
562 # foo | |
563 } else { | |
564 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
565 } | |
566 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
567 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
568 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
569 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
570 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
571 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
572 } |
68 | 573 |
28 | 574 set iresults 0 |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
575 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" |
68 | 576 urldb eval $usql { |
28 | 577 incr iresults |
578 set shortURL $uurl | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
579 if {$urllog_shorturl != 0 && $uid != ""} { |
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
580 set shortURL "$shortURL [urllog_get_short $uid]" |
28 | 581 } |
582 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
583 } | |
584 | |
585 if {$iresults == 0} { | |
586 # If no URLs were found | |
587 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
588 } | |
0 | 589 |
28 | 590 return 0 |
0 | 591 } |
592 | |
593 | |
594 #------------------------------------------------------------------------- | |
595 ### Finding binded functions | |
596 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 597 urllog_find $unick $uhand $uchan $utext 1 |
598 return 0 | |
0 | 599 } |
600 | |
601 | |
602 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 603 urllog_find $unick $uhand "" $utext 0 |
604 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
605 } |
0 | 606 |
607 # end of script |