Mercurial > hg > egg-tcls
annotate urllog.tcl @ 114:593874678e45
Clarify authorship by doing sed "s/ccr\/TNSP/Matti 'ccr' Hamalainen/g".
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 21 Sep 2011 07:47:47 +0300 |
parents | 077c7383f36f |
children | 5db02af76016 |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
114
593874678e45
Clarify authorship by doing sed "s/ccr\/TNSP/Matti 'ccr' Hamalainen/g".
Matti Hamalainen <ccr@tnsp.org>
parents:
113
diff
changeset
|
3 # URLLog v2.2.1 by Matti 'ccr' Hamalainen <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
113
077c7383f36f
urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents:
112
diff
changeset
|
6 # This script is freely distributable under GNU GPL (version 2) license. |
077c7383f36f
urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents:
112
diff
changeset
|
7 # |
0 | 8 ########################################################################## |
9 # | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
10 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
11 # This script requires SQLite TCL extension. Under Debian, you need: |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
12 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course) |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
13 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
14 # NOTICE! If you are upgrading to URLLog v2.0+ from any 1.x version, you |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
15 # may want to run a conversion script against your URL-database file, |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # if you wish to preserve the old data. |
0 | 17 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
18 # See convert_urllog_db.tcl for more information. |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
19 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
20 # If you are doing a fresh install, you will need to create the |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
21 # initial SQLite3 database with the required table schemas. You |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
22 # can do that by running: create_urllog_db.tcl |
0 | 23 # |
24 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
25 |
0 | 26 ### |
27 ### HTTP options | |
28 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
29 # Set to 1 if you want to enable use of HTTP proxy. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
30 # If you do, you MUST set the proxy settings below too. |
0 | 31 set http_proxy 0 |
32 | |
33 # Proxy host and port number (only used if enabled above) | |
34 set http_proxy_host "" | |
35 set http_proxy_port 8080 | |
36 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
37 # Enable _experimental_ TLS/SSL support. This may not work at all. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
38 # If unsure, leave this option disabled (0). |
104
da337ca10e0a
urllog: Enable SSL/TLS support by default.
Matti Hamalainen <ccr@tnsp.org>
parents:
103
diff
changeset
|
39 set http_tls_support 1 |
0 | 40 |
89
77e05ce9e9b8
urllog: Add certdir option setting.
Matti Hamalainen <ccr@tnsp.org>
parents:
87
diff
changeset
|
41 set http_tls_cadir "/usr/share/ca-certificates/mozilla" |
77e05ce9e9b8
urllog: Add certdir option setting.
Matti Hamalainen <ccr@tnsp.org>
parents:
87
diff
changeset
|
42 |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
43 |
0 | 44 ### |
45 ### General options | |
46 ### | |
47 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
48 # Filename of the SQLite URL database file |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
49 set urllog_db_file "urllog.sqlite" |
0 | 50 |
51 | |
52 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
53 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
54 set urllog_verbose 1 | |
55 | |
56 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
57 # 1 = Enable logging of various script actions into bot's log |
0 | 58 # 0 = Don't. |
59 set urllog_logmsg 1 | |
60 | |
61 | |
62 # 1 = Check URLs for validity and existence before adding. | |
63 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
64 set urllog_check 1 | |
65 | |
66 | |
67 ### | |
68 ### Search related settings | |
69 ### | |
70 | |
71 # 0 = No search-commands available | |
72 # 1 = Search enabled | |
73 set urllog_search 1 | |
74 | |
75 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
76 # Limit how many URLs should the "!urlfind" command show at most. |
0 | 77 set urllog_showmax_pub 3 |
78 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
79 # Same as above, but for private message search. |
0 | 80 set urllog_showmax_priv 6 |
81 | |
82 | |
83 ### | |
84 ### ShortURL-settings | |
85 ### | |
86 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
87 # 1 = Enable showing of ShortURLs |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
88 # 0 = ShortURLs not shown in any bot actions |
0 | 89 set urllog_shorturl 1 |
90 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
91 # Max length of original URL to be shown, rest is chopped |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
92 # off if the URL is longer than the specified amount. |
0 | 93 set urllog_shorturl_orig 30 |
94 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
95 # Web server URL that handles redirects of ShortURLs |
0 | 96 set urllog_shorturl_prefix "http://tnsp.org/u/" |
97 | |
98 | |
99 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
100 ### Message texts (informal, errors, etc.) |
0 | 101 ### |
102 | |
103 # No such host was found | |
104 set urlmsg_nosuchhost "ei tommosta oo!" | |
105 | |
106 # Could not connect host (I/O errors etc) | |
107 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
108 | |
109 # HTTP timeout | |
110 set urlmsg_timeout "ei jaksa ootella" | |
111 | |
112 # No such document was found | |
113 set urlmsg_errorgettingdoc "siitosvirhe" | |
114 | |
115 # URL was already known (was in database) | |
116 set urlmsg_alreadyknown "wanha!" | |
117 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
118 | |
119 # No match was found when searched with !urlfind or other command | |
120 set urlmsg_nomatch "Ei osumia." | |
121 | |
122 | |
123 ### | |
124 ### Things that you usually don't need to touch ... | |
125 ### | |
126 | |
127 # What IRC "command" should we use to send messages: | |
128 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
129 set urllog_preferredmsg "PRIVMSG" | |
130 | |
131 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
132 # (Now includes the new IANA published TLDs) | |
90
a9a4456eb213
urllog: Add .xxx TLD to supported list.
Matti Hamalainen <ccr@tnsp.org>
parents:
89
diff
changeset
|
133 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int,xxx" |
0 | 134 |
135 | |
136 ########################################################################## | |
137 # No need to look below this line | |
138 ########################################################################## | |
139 set urllog_name "URLLog" | |
107 | 140 set urllog_version "2.2.1" |
0 | 141 |
142 set urllog_tlds [split $urllog_tlds ","] | |
143 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
144 | |
102
5425dc418505
urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents:
101
diff
changeset
|
145 |
111
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
146 set urllog_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬||" |
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
147 append urllog_ent_str "‎||å|Ã¥|Å|Ã…|é|é|:|:| | |" |
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
148 append urllog_ent_str "”|\"|“|\"|«|<<|»|>>|"|\"|" |
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
149 append urllog_ent_str "ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" |
102
5425dc418505
urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents:
101
diff
changeset
|
150 set urllog_html_ent [split [encoding convertfrom "utf-8" $urllog_ent_str] "|"] |
0 | 151 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
152 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
153 package require sqlite3 |
0 | 154 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
155 |
0 | 156 ### Binding initializations |
157 if {$urllog_search != 0} { | |
28 | 158 bind pub - !urlfind urllog_pub_urlfind |
159 bind msg - urlfind urllog_msg_urlfind | |
0 | 160 } |
161 | |
162 bind pubm - *.* urllog_checkmsg | |
163 bind topc - *.* urllog_checkmsg | |
164 | |
165 | |
166 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
167 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 168 putlog "$urllog_message" |
169 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
170 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
171 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
172 if {$http_proxy != 0} { |
28 | 173 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
174 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
175 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
176 if {$http_tls_support != 0} { |
28 | 177 package require tls |
89
77e05ce9e9b8
urllog: Add certdir option setting.
Matti Hamalainen <ccr@tnsp.org>
parents:
87
diff
changeset
|
178 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir $http_tls_cadir] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
179 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
180 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
181 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
182 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 183 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
184 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
185 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
186 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
187 |
0 | 188 if {$http_proxy != 0} { |
28 | 189 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 190 } |
191 | |
192 if {$urllog_check != 0} { | |
28 | 193 putlog " (Additional URL validity checks enabled)" |
0 | 194 } |
195 | |
196 if {$urllog_verbose != 0} { | |
28 | 197 putlog " (Verbose mode enabled)" |
0 | 198 } |
199 | |
200 if {$urllog_search != 0} { | |
28 | 201 putlog " (Search commands enabled)" |
0 | 202 } |
203 | |
204 #------------------------------------------------------------------------- | |
205 ### Utility functions | |
206 proc urllog_log {arg} { | |
28 | 207 global urllog_logmsg urllog_name |
0 | 208 |
28 | 209 if {$urllog_logmsg != 0} { |
210 putlog "$urllog_name: $arg" | |
211 } | |
0 | 212 } |
213 | |
214 | |
215 proc urllog_ctime { utime } { | |
216 | |
28 | 217 if {$utime == "" || $utime == "*"} { |
218 set utime 0 | |
219 } | |
0 | 220 |
28 | 221 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 222 } |
223 | |
224 | |
225 proc urllog_isnumber {uarg} { | |
226 | |
28 | 227 foreach i [split $uarg {}] { |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
228 if {![string match \[0-9\] $i]} { return 0 } |
28 | 229 } |
0 | 230 |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
231 return 1 |
0 | 232 } |
233 | |
234 | |
235 proc urllog_msg {apublic anick achan amsg} { | |
28 | 236 global urllog_preferredmsg |
0 | 237 |
28 | 238 if {$apublic == 1} { |
239 putserv "$urllog_preferredmsg $achan :$amsg" | |
240 } else { | |
241 putserv "$urllog_preferredmsg $anick :$amsg" | |
242 } | |
0 | 243 } |
244 | |
245 | |
246 proc urllog_verb_msg {anick achan amsg} { | |
28 | 247 global urllog_verbose |
0 | 248 |
28 | 249 if {$urllog_verbose != 0} { |
250 urllog_msg 1 $anick $achan $amsg | |
251 } | |
0 | 252 } |
253 | |
254 | |
255 proc urllog_convert_ent {udata} { | |
28 | 256 global urllog_html_ent |
106
fc50d5fd6ce8
urllog: urllog_convert_ent was incorrectly mapping entities first with -nocase, resulting in lowercase characters when uppercase was the correct one. Fixed. Also optimized a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
104
diff
changeset
|
257 regsub -all "\r|\n" $udata " " utmp |
28 | 258 regsub -all " *" $utmp " " utmp |
259 regsub -all "\t" $utmp "" utmp | |
106
fc50d5fd6ce8
urllog: urllog_convert_ent was incorrectly mapping entities first with -nocase, resulting in lowercase characters when uppercase was the correct one. Fixed. Also optimized a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
104
diff
changeset
|
260 return [string map -nocase $urllog_html_ent [string map $urllog_html_ent $utmp]] |
0 | 261 } |
262 | |
263 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
264 proc urllog_escape { str } { |
28 | 265 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
266 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
267 |
0 | 268 #------------------------------------------------------------------------- |
269 proc urllog_get_short {utime} { | |
68 | 270 global urllog_shorturl_prefix |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
271 |
28 | 272 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
273 set ulen [string length $ustr] | |
0 | 274 |
28 | 275 set u1 [expr $utime / ($ulen * $ulen)] |
276 set utmp [expr $utime % ($ulen * $ulen)] | |
277 set u2 [expr $utmp / $ulen] | |
278 set u3 [expr $utmp % $ulen] | |
0 | 279 |
28 | 280 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 281 } |
282 | |
283 | |
284 #------------------------------------------------------------------------- | |
285 proc urllog_chop_url {url} { | |
28 | 286 global urllog_shorturl_orig |
68 | 287 |
28 | 288 if {[string length $url] > $urllog_shorturl_orig} { |
289 return "[string range $url 0 $urllog_shorturl_orig]..." | |
290 } else { | |
291 return $url | |
292 } | |
0 | 293 } |
294 | |
295 #------------------------------------------------------------------------- | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
296 proc urllog_exists {urlStr urlNick urlHost urlChan} { |
28 | 297 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 298 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
299 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[urllog_escape $urlStr]'" |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
300 urldb eval $usql { |
28 | 301 urllog_log "URL said by $urlNick ($urlStr) already known" |
302 if {$urllog_shorturl != 0} { | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
303 set qstr "[urllog_get_short $uid] " |
28 | 304 } else { |
305 set qstr "" | |
306 } | |
307 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
308 if {[string length $utitle] > 0} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
309 set qstr "$urlmsg_alreadyknown - '$utitle' $qstr" |
28 | 310 } else { |
311 set qstr "$urlmsg_alreadyknown $qstr" | |
312 } | |
313 urllog_verb_msg $urlNick $urlChan $qstr | |
314 return 0 | |
315 } | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
316 return 1 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
317 } |
0 | 318 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
319 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
320 #------------------------------------------------------------------------- |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
321 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
322 global urldb urllog_shorturl |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
323 |
93
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
324 if {$urlTitle == ""} { |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
325 set uins "NULL" |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
326 } else { |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
327 set uins "'[urllog_escape $urlTitle]'" |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
328 } |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
329 set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]', $uins)" |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
330 if {[catch {urldb eval $usql} uerrmsg]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
331 urllog_log "$uerrmsg on SQL:\n$usql" |
28 | 332 return 0 |
333 } | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
334 set uid [urldb last_insert_rowid] |
28 | 335 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 336 |
337 | |
28 | 338 ### Let's say something, to confirm that everything went well. |
339 if {$urllog_shorturl != 0} { | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
340 set qstr "[urllog_get_short $uid] " |
28 | 341 } else { |
342 set qstr "" | |
343 } | |
344 if {[string length $urlTitle] > 0} { | |
345 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
346 } else { | |
347 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
348 } | |
0 | 349 |
28 | 350 return 1 |
0 | 351 } |
352 | |
353 | |
354 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
355 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 356 upvar #0 $utoken state |
0 | 357 |
28 | 358 # Stop fetching data after 3000 bytes, this should be enough to |
359 # contain the head section of a HTML page. | |
360 if {$ucurr > 64000} { | |
361 set state(status) "ok" | |
362 } | |
0 | 363 } |
364 | |
84
fa1e95c2a0bc
urllog: Bump version to 2.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
83
diff
changeset
|
365 |
0 | 366 #------------------------------------------------------------------------- |
367 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 368 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
369 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
370 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
371 global http_tls_support |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
372 |
96
e5a6c27be365
urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents:
95
diff
changeset
|
373 ### Try to guess the URL protocol component (if it is missing) |
28 | 374 set u_checktld 1 |
375 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
376 set urlStr "http://$urlStr" | |
377 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
378 set urlStr "ftp://$urlStr" | |
379 } | |
0 | 380 |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
381 ### Handle URLs that have an IPv4-address |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
382 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_proto ni1 ni2 ni3 ni4]} { |
28 | 383 # Check if the IP is on local network |
92
f6f4595856ff
urllog: Cosmetics. Remove useless parenthesis.
Matti Hamalainen <ccr@tnsp.org>
parents:
91
diff
changeset
|
384 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168) || $ni1 == 0} { |
28 | 385 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." |
386 return 0 | |
387 } | |
388 # Skip TLD check for URLs with IP address | |
389 set u_checktld 0 | |
390 } | |
0 | 391 |
96
e5a6c27be365
urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents:
95
diff
changeset
|
392 ### Check now if we have an ShortURL here ... |
28 | 393 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
98
fbbe7ee40e2f
urllog: Improve one informational / error message.
Matti Hamalainen <ccr@tnsp.org>
parents:
97
diff
changeset
|
394 urllog_log "Ignoring ShortURL from $urlNick: $urlStr" |
28 | 395 return 0 |
396 } | |
0 | 397 |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
398 ### Get URL protocol component |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
399 set u_proto "" |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
400 if {[regexp "(\[a-z\]+)://" $urlStr u_match u_proto]} { |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
401 } |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
402 |
28 | 403 ### Check the PORT (if the ":" is there) |
404 set u_record [split $urlStr "/"] | |
405 set u_hostname [lindex $u_record 2] | |
406 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 407 |
28 | 408 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
409 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
410 return 0 | |
411 } | |
0 | 412 |
28 | 413 # Default to port 80 (HTTP) |
414 if {![urllog_isnumber $u_port]} { | |
415 set u_port 80 | |
416 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
417 |
28 | 418 ### Is it a http or ftp url? (FIX ME!) |
97
366e68ad94df
urllog: Use u_proto variable to check for if the protocol is supported instead of doing useless additional string checking.
Matti Hamalainen <ccr@tnsp.org>
parents:
96
diff
changeset
|
419 if {$u_proto != "http" && $u_proto != "https" && $u_proto != "ftp"} { |
366e68ad94df
urllog: Use u_proto variable to check for if the protocol is supported instead of doing useless additional string checking.
Matti Hamalainen <ccr@tnsp.org>
parents:
96
diff
changeset
|
420 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED protocol class ($u_proto)." |
28 | 421 return 0 |
422 } | |
0 | 423 |
28 | 424 ### Check the Top Level Domain (TLD) validity |
425 if {$u_checktld != 0} { | |
426 set u_sane [lindex [split $u_hostname "."] end] | |
427 set u_tld [lindex [split $u_sane ":"] 0] | |
428 set u_found 0 | |
0 | 429 |
28 | 430 if {[string length $u_tld] == 2} { |
431 # Assume all 2-letter domains to be valid :) | |
432 set u_found 1 | |
433 } else { | |
434 # Check our list of known TLDs | |
435 foreach itld $urllog_tlds { | |
436 if {[string match $itld $u_tld]} { | |
437 set u_found 1 | |
438 } | |
439 } | |
440 } | |
0 | 441 |
28 | 442 if {$u_found == 0} { |
443 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
444 return 0 | |
445 } | |
446 } | |
0 | 447 |
28 | 448 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
449 |
91
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
450 ### Does the URL already exist? |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
451 if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
452 return 1 |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
453 } |
0 | 454 |
28 | 455 ### Do we perform additional optional checks? |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
456 if {$urllog_check == 0 || ($http_tls_support == 0 && $u_proto == "https") || $u_proto != "http"} { |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
457 # No optional checks, just add the URL, if it does not exist already |
91
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
458 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" |
28 | 459 return 1 |
460 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
461 |
28 | 462 ### Does the document pointed by the URL exist? |
463 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
464 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
465 urllog_log "HTTP request failed: $uerrmsg" | |
466 return 0 | |
467 } | |
0 | 468 |
28 | 469 if {[::http::status $utoken] == "timeout"} { |
470 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
471 urllog_log "HTTP request timed out ($urlStr)" | |
472 return 0 | |
473 } | |
0 | 474 |
28 | 475 if {[::http::status $utoken] != "ok"} { |
476 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
477 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
478 return 0 | |
479 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
480 |
28 | 481 # Fixme! Handle redirects! |
482 set ucode [::http::ncode $utoken] | |
483 if {$ucode >= 200 && $ucode <= 309} { | |
484 set udata [::http::data $utoken] | |
485 set uconvert 0 | |
101
372b63af72b5
urllog: Improve page character set encoding detection/guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
99
diff
changeset
|
486 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/>" $udata umatches uencoding]} { |
28 | 487 if {[string length $uencoding] > 3} { |
101
372b63af72b5
urllog: Improve page character set encoding detection/guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
99
diff
changeset
|
488 set uencoding [string tolower $uencoding] |
372b63af72b5
urllog: Improve page character set encoding detection/guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
99
diff
changeset
|
489 regsub -- "iso-" $uencoding "iso" uencoding |
28 | 490 set uconvert 1 |
491 } | |
492 } | |
86
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
493 if {$uconvert == 0} { |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
494 set uencoding "iso8859-1" |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
495 } |
0 | 496 |
28 | 497 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
498 if {[llength $umatches] > 0} { | |
499 set urlTitle [lindex $umatches 1] | |
86
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
500 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
501 urllog_log "Error in charset conversion: $cerrmsg" |
28 | 502 } |
503 set urlTitle [urllog_convert_ent $urlTitle] | |
504 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
505 } else { | |
506 set urlTitle "" | |
507 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
508 |
28 | 509 # Rasiatube hack |
510 if {[string match "*/rasiatube/view*" $urlStr]} { | |
511 set rasia 0 | |
512 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
513 if {[llength $umatches] > 0} { | |
514 set urlStr [lindex $umatches 1] | |
515 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
516 set rasia 1 | |
517 } else { | |
518 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
519 if {[llength $umatches] > 0} { | |
520 set urlStr [lindex $umatches 1] | |
521 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
522 set rasia 1 | |
523 } | |
524 } | |
525 | |
526 if {$rasia != 0} { | |
527 urllog_log "RasiaTube mangler: $urlStr" | |
528 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
529 } | |
530 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
531 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
532 # Check if the URL already exists, just in case we had some redirects |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
533 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
534 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
535 } |
28 | 536 return 1 |
537 } else { | |
538 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
539 urllog_log "[::http::code $utoken] - $urlStr" | |
540 } | |
0 | 541 |
28 | 542 ::http::cleanup $utoken |
0 | 543 } |
544 | |
545 | |
546 #------------------------------------------------------------------------- | |
87 | 547 proc urllog_checkmsg {unick uhost uhand uchan utext} { |
28 | 548 ### Check the nick |
87 | 549 if {$unick == "*"} { |
28 | 550 urllog_log "urllog_checkmsg: nick was wc, this should not happen." |
551 return 0 | |
552 } | |
0 | 553 |
28 | 554 ### Do the URL checking |
87 | 555 foreach str [split $utext " "] { |
556 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $str]} { | |
557 urllog_checkurl $str $unick $uhost $uchan | |
28 | 558 } |
559 } | |
0 | 560 |
28 | 561 return 0 |
0 | 562 } |
563 | |
564 | |
565 #------------------------------------------------------------------------- | |
566 ### Parse arguments, find and show the results | |
567 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
568 global urllog_shorturl urldb |
28 | 569 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 570 |
28 | 571 if {$upublic == 0} { |
572 set ulimit 5 | |
573 } else { | |
574 set ulimit 3 | |
575 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
576 |
28 | 577 ### Parse the given command |
578 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 579 |
28 | 580 set ftokens [split $utext " "] |
581 set fpatlist "" | |
582 foreach ftoken $ftokens { | |
583 set fprefix [string range $ftoken 0 0] | |
584 set fpattern [string range $ftoken 1 end] | |
0 | 585 |
28 | 586 if {$fprefix == "-"} { |
587 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
588 } elseif {$fprefix == "%"} { | |
110
4aa1e1d545ed
urllog and quotedb: Use SQL LIKE operator for username search terms to avoid unnecessary case-sensitivity.
Matti Hamalainen <ccr@tnsp.org>
parents:
109
diff
changeset
|
589 lappend fpatlist "user LIKE '[urllog_escape $fpattern]'" |
28 | 590 } elseif {$fprefix == "@"} { |
591 # foo | |
112
fae3dd7a8b20
urllog: Oops, a typo in variable name. Fixed.
Matti Hamalainen <ccr@tnsp.org>
parents:
111
diff
changeset
|
592 } elseif {$fprefix == "+"} { |
109
74cb254dbf09
urllog and quotedb: Handle "+"-prefix in searches as it is documented.
Matti Hamalainen <ccr@tnsp.org>
parents:
107
diff
changeset
|
593 lappend fpatlist "url LIKE '%[urllog_escape $fpattern]%'" |
28 | 594 } else { |
595 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
596 } | |
597 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
598 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
599 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
600 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
601 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
602 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
603 } |
68 | 604 |
28 | 605 set iresults 0 |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
606 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" |
68 | 607 urldb eval $usql { |
28 | 608 incr iresults |
609 set shortURL $uurl | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
610 if {$urllog_shorturl != 0 && $uid != ""} { |
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
611 set shortURL "$shortURL [urllog_get_short $uid]" |
28 | 612 } |
613 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
614 } | |
615 | |
616 if {$iresults == 0} { | |
617 # If no URLs were found | |
618 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
619 } | |
0 | 620 |
28 | 621 return 0 |
0 | 622 } |
623 | |
624 | |
625 #------------------------------------------------------------------------- | |
626 ### Finding binded functions | |
627 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 628 urllog_find $unick $uhand $uchan $utext 1 |
629 return 0 | |
0 | 630 } |
631 | |
632 | |
633 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 634 urllog_find $unick $uhand "" $utext 0 |
635 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
636 } |
0 | 637 |
638 # end of script |