Mercurial > hg > egg-tcls
annotate urllog.tcl @ 111:e09c791b2a48
urllog: Improve entity handling.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 19 Sep 2011 21:20:08 +0300 |
parents | 4aa1e1d545ed |
children | fae3dd7a8b20 |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
107 | 3 # URLLog v2.2.1 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
8 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
9 # This script requires SQLite TCL extension. Under Debian, you need: |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
10 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course) |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
11 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
12 # NOTICE! If you are upgrading to URLLog v2.0+ from any 1.x version, you |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
13 # may want to run a conversion script against your URL-database file, |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
14 # if you wish to preserve the old data. |
0 | 15 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # See convert_urllog_db.tcl for more information. |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
17 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
18 # If you are doing a fresh install, you will need to create the |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
19 # initial SQLite3 database with the required table schemas. You |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
20 # can do that by running: create_urllog_db.tcl |
0 | 21 # |
22 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
23 |
0 | 24 ### |
25 ### HTTP options | |
26 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
27 # Set to 1 if you want to enable use of HTTP proxy. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
28 # If you do, you MUST set the proxy settings below too. |
0 | 29 set http_proxy 0 |
30 | |
31 # Proxy host and port number (only used if enabled above) | |
32 set http_proxy_host "" | |
33 set http_proxy_port 8080 | |
34 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
35 # Enable _experimental_ TLS/SSL support. This may not work at all. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
36 # If unsure, leave this option disabled (0). |
104
da337ca10e0a
urllog: Enable SSL/TLS support by default.
Matti Hamalainen <ccr@tnsp.org>
parents:
103
diff
changeset
|
37 set http_tls_support 1 |
0 | 38 |
89
77e05ce9e9b8
urllog: Add certdir option setting.
Matti Hamalainen <ccr@tnsp.org>
parents:
87
diff
changeset
|
39 set http_tls_cadir "/usr/share/ca-certificates/mozilla" |
77e05ce9e9b8
urllog: Add certdir option setting.
Matti Hamalainen <ccr@tnsp.org>
parents:
87
diff
changeset
|
40 |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
41 |
0 | 42 ### |
43 ### General options | |
44 ### | |
45 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
46 # Filename of the SQLite URL database file |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
47 set urllog_db_file "urllog.sqlite" |
0 | 48 |
49 | |
50 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
51 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
52 set urllog_verbose 1 | |
53 | |
54 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
55 # 1 = Enable logging of various script actions into bot's log |
0 | 56 # 0 = Don't. |
57 set urllog_logmsg 1 | |
58 | |
59 | |
60 # 1 = Check URLs for validity and existence before adding. | |
61 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
62 set urllog_check 1 | |
63 | |
64 | |
65 ### | |
66 ### Search related settings | |
67 ### | |
68 | |
69 # 0 = No search-commands available | |
70 # 1 = Search enabled | |
71 set urllog_search 1 | |
72 | |
73 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
74 # Limit how many URLs should the "!urlfind" command show at most. |
0 | 75 set urllog_showmax_pub 3 |
76 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
77 # Same as above, but for private message search. |
0 | 78 set urllog_showmax_priv 6 |
79 | |
80 | |
81 ### | |
82 ### ShortURL-settings | |
83 ### | |
84 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
85 # 1 = Enable showing of ShortURLs |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
86 # 0 = ShortURLs not shown in any bot actions |
0 | 87 set urllog_shorturl 1 |
88 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
89 # Max length of original URL to be shown, rest is chopped |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
90 # off if the URL is longer than the specified amount. |
0 | 91 set urllog_shorturl_orig 30 |
92 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
93 # Web server URL that handles redirects of ShortURLs |
0 | 94 set urllog_shorturl_prefix "http://tnsp.org/u/" |
95 | |
96 | |
97 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
98 ### Message texts (informal, errors, etc.) |
0 | 99 ### |
100 | |
101 # No such host was found | |
102 set urlmsg_nosuchhost "ei tommosta oo!" | |
103 | |
104 # Could not connect host (I/O errors etc) | |
105 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
106 | |
107 # HTTP timeout | |
108 set urlmsg_timeout "ei jaksa ootella" | |
109 | |
110 # No such document was found | |
111 set urlmsg_errorgettingdoc "siitosvirhe" | |
112 | |
113 # URL was already known (was in database) | |
114 set urlmsg_alreadyknown "wanha!" | |
115 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
116 | |
117 # No match was found when searched with !urlfind or other command | |
118 set urlmsg_nomatch "Ei osumia." | |
119 | |
120 | |
121 ### | |
122 ### Things that you usually don't need to touch ... | |
123 ### | |
124 | |
125 # What IRC "command" should we use to send messages: | |
126 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
127 set urllog_preferredmsg "PRIVMSG" | |
128 | |
129 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
130 # (Now includes the new IANA published TLDs) | |
90
a9a4456eb213
urllog: Add .xxx TLD to supported list.
Matti Hamalainen <ccr@tnsp.org>
parents:
89
diff
changeset
|
131 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int,xxx" |
0 | 132 |
133 | |
134 ########################################################################## | |
135 # No need to look below this line | |
136 ########################################################################## | |
137 set urllog_name "URLLog" | |
107 | 138 set urllog_version "2.2.1" |
0 | 139 |
140 set urllog_tlds [split $urllog_tlds ","] | |
141 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
142 | |
102
5425dc418505
urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents:
101
diff
changeset
|
143 |
111
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
144 set urllog_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬||" |
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
145 append urllog_ent_str "‎||å|Ã¥|Å|Ã…|é|é|:|:| | |" |
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
146 append urllog_ent_str "”|\"|“|\"|«|<<|»|>>|"|\"|" |
e09c791b2a48
urllog: Improve entity handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
110
diff
changeset
|
147 append urllog_ent_str "ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" |
102
5425dc418505
urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents:
101
diff
changeset
|
148 set urllog_html_ent [split [encoding convertfrom "utf-8" $urllog_ent_str] "|"] |
0 | 149 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
150 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
151 package require sqlite3 |
0 | 152 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
153 |
0 | 154 ### Binding initializations |
155 if {$urllog_search != 0} { | |
28 | 156 bind pub - !urlfind urllog_pub_urlfind |
157 bind msg - urlfind urllog_msg_urlfind | |
0 | 158 } |
159 | |
160 bind pubm - *.* urllog_checkmsg | |
161 bind topc - *.* urllog_checkmsg | |
162 | |
163 | |
164 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
165 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 166 putlog "$urllog_message" |
167 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
168 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
169 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
170 if {$http_proxy != 0} { |
28 | 171 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
172 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
173 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
174 if {$http_tls_support != 0} { |
28 | 175 package require tls |
89
77e05ce9e9b8
urllog: Add certdir option setting.
Matti Hamalainen <ccr@tnsp.org>
parents:
87
diff
changeset
|
176 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir $http_tls_cadir] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
177 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
178 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
179 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
180 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 181 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
182 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
183 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
184 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
185 |
0 | 186 if {$http_proxy != 0} { |
28 | 187 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 188 } |
189 | |
190 if {$urllog_check != 0} { | |
28 | 191 putlog " (Additional URL validity checks enabled)" |
0 | 192 } |
193 | |
194 if {$urllog_verbose != 0} { | |
28 | 195 putlog " (Verbose mode enabled)" |
0 | 196 } |
197 | |
198 if {$urllog_search != 0} { | |
28 | 199 putlog " (Search commands enabled)" |
0 | 200 } |
201 | |
202 #------------------------------------------------------------------------- | |
203 ### Utility functions | |
204 proc urllog_log {arg} { | |
28 | 205 global urllog_logmsg urllog_name |
0 | 206 |
28 | 207 if {$urllog_logmsg != 0} { |
208 putlog "$urllog_name: $arg" | |
209 } | |
0 | 210 } |
211 | |
212 | |
213 proc urllog_ctime { utime } { | |
214 | |
28 | 215 if {$utime == "" || $utime == "*"} { |
216 set utime 0 | |
217 } | |
0 | 218 |
28 | 219 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 220 } |
221 | |
222 | |
223 proc urllog_isnumber {uarg} { | |
224 | |
28 | 225 foreach i [split $uarg {}] { |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
226 if {![string match \[0-9\] $i]} { return 0 } |
28 | 227 } |
0 | 228 |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
229 return 1 |
0 | 230 } |
231 | |
232 | |
233 proc urllog_msg {apublic anick achan amsg} { | |
28 | 234 global urllog_preferredmsg |
0 | 235 |
28 | 236 if {$apublic == 1} { |
237 putserv "$urllog_preferredmsg $achan :$amsg" | |
238 } else { | |
239 putserv "$urllog_preferredmsg $anick :$amsg" | |
240 } | |
0 | 241 } |
242 | |
243 | |
244 proc urllog_verb_msg {anick achan amsg} { | |
28 | 245 global urllog_verbose |
0 | 246 |
28 | 247 if {$urllog_verbose != 0} { |
248 urllog_msg 1 $anick $achan $amsg | |
249 } | |
0 | 250 } |
251 | |
252 | |
253 proc urllog_convert_ent {udata} { | |
28 | 254 global urllog_html_ent |
106
fc50d5fd6ce8
urllog: urllog_convert_ent was incorrectly mapping entities first with -nocase, resulting in lowercase characters when uppercase was the correct one. Fixed. Also optimized a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
104
diff
changeset
|
255 regsub -all "\r|\n" $udata " " utmp |
28 | 256 regsub -all " *" $utmp " " utmp |
257 regsub -all "\t" $utmp "" utmp | |
106
fc50d5fd6ce8
urllog: urllog_convert_ent was incorrectly mapping entities first with -nocase, resulting in lowercase characters when uppercase was the correct one. Fixed. Also optimized a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
104
diff
changeset
|
258 return [string map -nocase $urllog_html_ent [string map $urllog_html_ent $utmp]] |
0 | 259 } |
260 | |
261 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
262 proc urllog_escape { str } { |
28 | 263 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
264 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
265 |
0 | 266 #------------------------------------------------------------------------- |
267 proc urllog_get_short {utime} { | |
68 | 268 global urllog_shorturl_prefix |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
269 |
28 | 270 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
271 set ulen [string length $ustr] | |
0 | 272 |
28 | 273 set u1 [expr $utime / ($ulen * $ulen)] |
274 set utmp [expr $utime % ($ulen * $ulen)] | |
275 set u2 [expr $utmp / $ulen] | |
276 set u3 [expr $utmp % $ulen] | |
0 | 277 |
28 | 278 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 279 } |
280 | |
281 | |
282 #------------------------------------------------------------------------- | |
283 proc urllog_chop_url {url} { | |
28 | 284 global urllog_shorturl_orig |
68 | 285 |
28 | 286 if {[string length $url] > $urllog_shorturl_orig} { |
287 return "[string range $url 0 $urllog_shorturl_orig]..." | |
288 } else { | |
289 return $url | |
290 } | |
0 | 291 } |
292 | |
293 #------------------------------------------------------------------------- | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
294 proc urllog_exists {urlStr urlNick urlHost urlChan} { |
28 | 295 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 296 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
297 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[urllog_escape $urlStr]'" |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
298 urldb eval $usql { |
28 | 299 urllog_log "URL said by $urlNick ($urlStr) already known" |
300 if {$urllog_shorturl != 0} { | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
301 set qstr "[urllog_get_short $uid] " |
28 | 302 } else { |
303 set qstr "" | |
304 } | |
305 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
306 if {[string length $utitle] > 0} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
307 set qstr "$urlmsg_alreadyknown - '$utitle' $qstr" |
28 | 308 } else { |
309 set qstr "$urlmsg_alreadyknown $qstr" | |
310 } | |
311 urllog_verb_msg $urlNick $urlChan $qstr | |
312 return 0 | |
313 } | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
314 return 1 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
315 } |
0 | 316 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
317 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
318 #------------------------------------------------------------------------- |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
319 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
320 global urldb urllog_shorturl |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
321 |
93
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
322 if {$urlTitle == ""} { |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
323 set uins "NULL" |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
324 } else { |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
325 set uins "'[urllog_escape $urlTitle]'" |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
326 } |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
327 set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]', $uins)" |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
328 if {[catch {urldb eval $usql} uerrmsg]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
329 urllog_log "$uerrmsg on SQL:\n$usql" |
28 | 330 return 0 |
331 } | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
332 set uid [urldb last_insert_rowid] |
28 | 333 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 334 |
335 | |
28 | 336 ### Let's say something, to confirm that everything went well. |
337 if {$urllog_shorturl != 0} { | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
338 set qstr "[urllog_get_short $uid] " |
28 | 339 } else { |
340 set qstr "" | |
341 } | |
342 if {[string length $urlTitle] > 0} { | |
343 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
344 } else { | |
345 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
346 } | |
0 | 347 |
28 | 348 return 1 |
0 | 349 } |
350 | |
351 | |
352 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
353 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 354 upvar #0 $utoken state |
0 | 355 |
28 | 356 # Stop fetching data after 3000 bytes, this should be enough to |
357 # contain the head section of a HTML page. | |
358 if {$ucurr > 64000} { | |
359 set state(status) "ok" | |
360 } | |
0 | 361 } |
362 | |
84
fa1e95c2a0bc
urllog: Bump version to 2.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
83
diff
changeset
|
363 |
0 | 364 #------------------------------------------------------------------------- |
365 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 366 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
367 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
368 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
369 global http_tls_support |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
370 |
96
e5a6c27be365
urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents:
95
diff
changeset
|
371 ### Try to guess the URL protocol component (if it is missing) |
28 | 372 set u_checktld 1 |
373 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
374 set urlStr "http://$urlStr" | |
375 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
376 set urlStr "ftp://$urlStr" | |
377 } | |
0 | 378 |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
379 ### Handle URLs that have an IPv4-address |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
380 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_proto ni1 ni2 ni3 ni4]} { |
28 | 381 # Check if the IP is on local network |
92
f6f4595856ff
urllog: Cosmetics. Remove useless parenthesis.
Matti Hamalainen <ccr@tnsp.org>
parents:
91
diff
changeset
|
382 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168) || $ni1 == 0} { |
28 | 383 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." |
384 return 0 | |
385 } | |
386 # Skip TLD check for URLs with IP address | |
387 set u_checktld 0 | |
388 } | |
0 | 389 |
96
e5a6c27be365
urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents:
95
diff
changeset
|
390 ### Check now if we have an ShortURL here ... |
28 | 391 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
98
fbbe7ee40e2f
urllog: Improve one informational / error message.
Matti Hamalainen <ccr@tnsp.org>
parents:
97
diff
changeset
|
392 urllog_log "Ignoring ShortURL from $urlNick: $urlStr" |
28 | 393 return 0 |
394 } | |
0 | 395 |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
396 ### Get URL protocol component |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
397 set u_proto "" |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
398 if {[regexp "(\[a-z\]+)://" $urlStr u_match u_proto]} { |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
399 } |
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
400 |
28 | 401 ### Check the PORT (if the ":" is there) |
402 set u_record [split $urlStr "/"] | |
403 set u_hostname [lindex $u_record 2] | |
404 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 405 |
28 | 406 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
407 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
408 return 0 | |
409 } | |
0 | 410 |
28 | 411 # Default to port 80 (HTTP) |
412 if {![urllog_isnumber $u_port]} { | |
413 set u_port 80 | |
414 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
415 |
28 | 416 ### Is it a http or ftp url? (FIX ME!) |
97
366e68ad94df
urllog: Use u_proto variable to check for if the protocol is supported instead of doing useless additional string checking.
Matti Hamalainen <ccr@tnsp.org>
parents:
96
diff
changeset
|
417 if {$u_proto != "http" && $u_proto != "https" && $u_proto != "ftp"} { |
366e68ad94df
urllog: Use u_proto variable to check for if the protocol is supported instead of doing useless additional string checking.
Matti Hamalainen <ccr@tnsp.org>
parents:
96
diff
changeset
|
418 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED protocol class ($u_proto)." |
28 | 419 return 0 |
420 } | |
0 | 421 |
28 | 422 ### Check the Top Level Domain (TLD) validity |
423 if {$u_checktld != 0} { | |
424 set u_sane [lindex [split $u_hostname "."] end] | |
425 set u_tld [lindex [split $u_sane ":"] 0] | |
426 set u_found 0 | |
0 | 427 |
28 | 428 if {[string length $u_tld] == 2} { |
429 # Assume all 2-letter domains to be valid :) | |
430 set u_found 1 | |
431 } else { | |
432 # Check our list of known TLDs | |
433 foreach itld $urllog_tlds { | |
434 if {[string match $itld $u_tld]} { | |
435 set u_found 1 | |
436 } | |
437 } | |
438 } | |
0 | 439 |
28 | 440 if {$u_found == 0} { |
441 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
442 return 0 | |
443 } | |
444 } | |
0 | 445 |
28 | 446 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
447 |
91
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
448 ### Does the URL already exist? |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
449 if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
450 return 1 |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
451 } |
0 | 452 |
28 | 453 ### Do we perform additional optional checks? |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
454 if {$urllog_check == 0 || ($http_tls_support == 0 && $u_proto == "https") || $u_proto != "http"} { |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
455 # No optional checks, just add the URL, if it does not exist already |
91
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
456 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" |
28 | 457 return 1 |
458 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
459 |
28 | 460 ### Does the document pointed by the URL exist? |
461 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
462 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
463 urllog_log "HTTP request failed: $uerrmsg" | |
464 return 0 | |
465 } | |
0 | 466 |
28 | 467 if {[::http::status $utoken] == "timeout"} { |
468 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
469 urllog_log "HTTP request timed out ($urlStr)" | |
470 return 0 | |
471 } | |
0 | 472 |
28 | 473 if {[::http::status $utoken] != "ok"} { |
474 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
475 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
476 return 0 | |
477 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
478 |
28 | 479 # Fixme! Handle redirects! |
480 set ucode [::http::ncode $utoken] | |
481 if {$ucode >= 200 && $ucode <= 309} { | |
482 set udata [::http::data $utoken] | |
483 set uconvert 0 | |
101
372b63af72b5
urllog: Improve page character set encoding detection/guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
99
diff
changeset
|
484 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/>" $udata umatches uencoding]} { |
28 | 485 if {[string length $uencoding] > 3} { |
101
372b63af72b5
urllog: Improve page character set encoding detection/guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
99
diff
changeset
|
486 set uencoding [string tolower $uencoding] |
372b63af72b5
urllog: Improve page character set encoding detection/guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
99
diff
changeset
|
487 regsub -- "iso-" $uencoding "iso" uencoding |
28 | 488 set uconvert 1 |
489 } | |
490 } | |
86
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
491 if {$uconvert == 0} { |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
492 set uencoding "iso8859-1" |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
493 } |
0 | 494 |
28 | 495 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
496 if {[llength $umatches] > 0} { | |
497 set urlTitle [lindex $umatches 1] | |
86
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
498 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
499 urllog_log "Error in charset conversion: $cerrmsg" |
28 | 500 } |
501 set urlTitle [urllog_convert_ent $urlTitle] | |
502 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
503 } else { | |
504 set urlTitle "" | |
505 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
506 |
28 | 507 # Rasiatube hack |
508 if {[string match "*/rasiatube/view*" $urlStr]} { | |
509 set rasia 0 | |
510 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
511 if {[llength $umatches] > 0} { | |
512 set urlStr [lindex $umatches 1] | |
513 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
514 set rasia 1 | |
515 } else { | |
516 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
517 if {[llength $umatches] > 0} { | |
518 set urlStr [lindex $umatches 1] | |
519 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
520 set rasia 1 | |
521 } | |
522 } | |
523 | |
524 if {$rasia != 0} { | |
525 urllog_log "RasiaTube mangler: $urlStr" | |
526 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
527 } | |
528 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
529 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
530 # Check if the URL already exists, just in case we had some redirects |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
531 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
532 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
533 } |
28 | 534 return 1 |
535 } else { | |
536 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
537 urllog_log "[::http::code $utoken] - $urlStr" | |
538 } | |
0 | 539 |
28 | 540 ::http::cleanup $utoken |
0 | 541 } |
542 | |
543 | |
544 #------------------------------------------------------------------------- | |
87 | 545 proc urllog_checkmsg {unick uhost uhand uchan utext} { |
28 | 546 ### Check the nick |
87 | 547 if {$unick == "*"} { |
28 | 548 urllog_log "urllog_checkmsg: nick was wc, this should not happen." |
549 return 0 | |
550 } | |
0 | 551 |
28 | 552 ### Do the URL checking |
87 | 553 foreach str [split $utext " "] { |
554 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $str]} { | |
555 urllog_checkurl $str $unick $uhost $uchan | |
28 | 556 } |
557 } | |
0 | 558 |
28 | 559 return 0 |
0 | 560 } |
561 | |
562 | |
563 #------------------------------------------------------------------------- | |
564 ### Parse arguments, find and show the results | |
565 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
566 global urllog_shorturl urldb |
28 | 567 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 568 |
28 | 569 if {$upublic == 0} { |
570 set ulimit 5 | |
571 } else { | |
572 set ulimit 3 | |
573 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
574 |
28 | 575 ### Parse the given command |
576 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 577 |
28 | 578 set ftokens [split $utext " "] |
579 set fpatlist "" | |
580 foreach ftoken $ftokens { | |
581 set fprefix [string range $ftoken 0 0] | |
582 set fpattern [string range $ftoken 1 end] | |
0 | 583 |
28 | 584 if {$fprefix == "-"} { |
585 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
586 } elseif {$fprefix == "%"} { | |
110
4aa1e1d545ed
urllog and quotedb: Use SQL LIKE operator for username search terms to avoid unnecessary case-sensitivity.
Matti Hamalainen <ccr@tnsp.org>
parents:
109
diff
changeset
|
587 lappend fpatlist "user LIKE '[urllog_escape $fpattern]'" |
28 | 588 } elseif {$fprefix == "@"} { |
589 # foo | |
109
74cb254dbf09
urllog and quotedb: Handle "+"-prefix in searches as it is documented.
Matti Hamalainen <ccr@tnsp.org>
parents:
107
diff
changeset
|
590 } elseif {$prefix == "+"} { |
74cb254dbf09
urllog and quotedb: Handle "+"-prefix in searches as it is documented.
Matti Hamalainen <ccr@tnsp.org>
parents:
107
diff
changeset
|
591 lappend fpatlist "url LIKE '%[urllog_escape $fpattern]%'" |
28 | 592 } else { |
593 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
594 } | |
595 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
596 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
597 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
598 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
599 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
600 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
601 } |
68 | 602 |
28 | 603 set iresults 0 |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
604 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" |
68 | 605 urldb eval $usql { |
28 | 606 incr iresults |
607 set shortURL $uurl | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
608 if {$urllog_shorturl != 0 && $uid != ""} { |
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
609 set shortURL "$shortURL [urllog_get_short $uid]" |
28 | 610 } |
611 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
612 } | |
613 | |
614 if {$iresults == 0} { | |
615 # If no URLs were found | |
616 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
617 } | |
0 | 618 |
28 | 619 return 0 |
0 | 620 } |
621 | |
622 | |
623 #------------------------------------------------------------------------- | |
624 ### Finding binded functions | |
625 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 626 urllog_find $unick $uhand $uchan $utext 1 |
627 return 0 | |
0 | 628 } |
629 | |
630 | |
631 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 632 urllog_find $unick $uhand "" $utext 0 |
633 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
634 } |
0 | 635 |
636 # end of script |