Mercurial > hg > egg-tcls
annotate urllog.tcl @ 83:f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 11 Sep 2011 19:39:30 +0300 |
parents | 1bbc79f41a1c |
children | fa1e95c2a0bc |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
49 | 3 # URLLog v2.0.1 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
8 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
9 # This script requires SQLite TCL extension. Under Debian, you need: |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
10 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course) |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
11 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
12 # NOTICE! If you are upgrading to URLLog v2.0+ from any 1.x version, you |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
13 # may want to run a conversion script against your URL-database file, |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
14 # if you wish to preserve the old data. |
0 | 15 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # See convert_urllog_db.tcl for more information. |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
17 # |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
18 # If you are doing a fresh install, you will need to create the |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
19 # initial SQLite3 database with the required table schemas. You |
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
20 # can do that by running: create_urllog_db.tcl |
0 | 21 # |
22 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
23 |
0 | 24 ### |
25 ### HTTP options | |
26 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
27 # Set to 1 if you want to enable use of HTTP proxy. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
28 # If you do, you MUST set the proxy settings below too. |
0 | 29 set http_proxy 0 |
30 | |
31 # Proxy host and port number (only used if enabled above) | |
32 set http_proxy_host "" | |
33 set http_proxy_port 8080 | |
34 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
35 # Enable _experimental_ TLS/SSL support. This may not work at all. |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
36 # If unsure, leave this option disabled (0). |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
37 set http_tls_support 0 |
0 | 38 |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
39 |
0 | 40 ### |
41 ### General options | |
42 ### | |
43 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
44 # Filename of the SQLite URL database file |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
45 set urllog_db_file "urllog.sqlite" |
0 | 46 |
47 | |
48 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
49 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
50 set urllog_verbose 1 | |
51 | |
52 | |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
53 # 1 = Enable logging of various script actions into bot's log |
0 | 54 # 0 = Don't. |
55 set urllog_logmsg 1 | |
56 | |
57 | |
58 # 1 = Check URLs for validity and existence before adding. | |
59 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
60 set urllog_check 1 | |
61 | |
62 | |
63 ### | |
64 ### Search related settings | |
65 ### | |
66 | |
67 # 0 = No search-commands available | |
68 # 1 = Search enabled | |
69 set urllog_search 1 | |
70 | |
71 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
72 # Limit how many URLs should the "!urlfind" command show at most. |
0 | 73 set urllog_showmax_pub 3 |
74 | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
75 # Same as above, but for private message search. |
0 | 76 set urllog_showmax_priv 6 |
77 | |
78 | |
79 ### | |
80 ### ShortURL-settings | |
81 ### | |
82 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
83 # 1 = Enable showing of ShortURLs |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
84 # 0 = ShortURLs not shown in any bot actions |
0 | 85 set urllog_shorturl 1 |
86 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
87 # Max length of original URL to be shown, rest is chopped |
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
88 # off if the URL is longer than the specified amount. |
0 | 89 set urllog_shorturl_orig 30 |
90 | |
73
646b2fd67312
urllog: Improve documentation of different settings.
Matti Hamalainen <ccr@tnsp.org>
parents:
70
diff
changeset
|
91 # Web server URL that handles redirects of ShortURLs |
0 | 92 set urllog_shorturl_prefix "http://tnsp.org/u/" |
93 | |
94 | |
95 ### | |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
96 ### Message texts (informal, errors, etc.) |
0 | 97 ### |
98 | |
99 # No such host was found | |
100 set urlmsg_nosuchhost "ei tommosta oo!" | |
101 | |
102 # Could not connect host (I/O errors etc) | |
103 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
104 | |
105 # HTTP timeout | |
106 set urlmsg_timeout "ei jaksa ootella" | |
107 | |
108 # No such document was found | |
109 set urlmsg_errorgettingdoc "siitosvirhe" | |
110 | |
111 # URL was already known (was in database) | |
112 set urlmsg_alreadyknown "wanha!" | |
113 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
114 | |
115 # No match was found when searched with !urlfind or other command | |
116 set urlmsg_nomatch "Ei osumia." | |
117 | |
118 | |
119 ### | |
120 ### Things that you usually don't need to touch ... | |
121 ### | |
122 | |
123 # What IRC "command" should we use to send messages: | |
124 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
125 set urllog_preferredmsg "PRIVMSG" | |
126 | |
127 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
128 # (Now includes the new IANA published TLDs) | |
129 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
130 | |
131 | |
132 ########################################################################## | |
133 # No need to look below this line | |
134 ########################################################################## | |
135 set urllog_name "URLLog" | |
49 | 136 set urllog_version "2.0.1" |
0 | 137 |
138 set urllog_tlds [split $urllog_tlds ","] | |
139 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
140 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
141 set urllog_html_ent [split "‏||—|-|‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
0 | 142 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
143 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
144 package require sqlite3 |
0 | 145 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
146 |
0 | 147 ### Binding initializations |
148 if {$urllog_search != 0} { | |
28 | 149 bind pub - !urlfind urllog_pub_urlfind |
150 bind msg - urlfind urllog_msg_urlfind | |
0 | 151 } |
152 | |
153 bind pubm - *.* urllog_checkmsg | |
154 bind topc - *.* urllog_checkmsg | |
155 | |
156 | |
157 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
158 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 159 putlog "$urllog_message" |
160 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
161 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
162 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
163 if {$http_proxy != 0} { |
28 | 164 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
165 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
166 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
167 if {$http_tls_support != 0} { |
28 | 168 package require tls |
169 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"] | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
170 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
171 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
172 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
173 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
28 | 174 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
175 exit 2 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
176 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
177 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
178 |
0 | 179 if {$http_proxy != 0} { |
28 | 180 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" |
0 | 181 } |
182 | |
183 if {$urllog_check != 0} { | |
28 | 184 putlog " (Additional URL validity checks enabled)" |
0 | 185 } |
186 | |
187 if {$urllog_verbose != 0} { | |
28 | 188 putlog " (Verbose mode enabled)" |
0 | 189 } |
190 | |
191 if {$urllog_search != 0} { | |
28 | 192 putlog " (Search commands enabled)" |
0 | 193 } |
194 | |
195 #------------------------------------------------------------------------- | |
196 ### Utility functions | |
197 proc urllog_log {arg} { | |
28 | 198 global urllog_logmsg urllog_name |
0 | 199 |
28 | 200 if {$urllog_logmsg != 0} { |
201 putlog "$urllog_name: $arg" | |
202 } | |
0 | 203 } |
204 | |
205 | |
206 proc urllog_ctime { utime } { | |
207 | |
28 | 208 if {$utime == "" || $utime == "*"} { |
209 set utime 0 | |
210 } | |
0 | 211 |
28 | 212 return [clock format $utime -format "%d.%m.%Y %H:%M"] |
0 | 213 } |
214 | |
215 | |
216 proc urllog_isnumber {uarg} { | |
217 | |
28 | 218 foreach i [split $uarg {}] { |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
219 if {![string match \[0-9\] $i]} { return 0 } |
28 | 220 } |
0 | 221 |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
222 return 1 |
0 | 223 } |
224 | |
225 | |
226 proc urllog_msg {apublic anick achan amsg} { | |
28 | 227 global urllog_preferredmsg |
0 | 228 |
28 | 229 if {$apublic == 1} { |
230 putserv "$urllog_preferredmsg $achan :$amsg" | |
231 } else { | |
232 putserv "$urllog_preferredmsg $anick :$amsg" | |
233 } | |
0 | 234 } |
235 | |
236 | |
237 proc urllog_verb_msg {anick achan amsg} { | |
28 | 238 global urllog_verbose |
0 | 239 |
28 | 240 if {$urllog_verbose != 0} { |
241 urllog_msg 1 $anick $achan $amsg | |
242 } | |
0 | 243 } |
244 | |
245 | |
246 proc urllog_convert_ent {udata} { | |
28 | 247 global urllog_html_ent |
248 regsub -all " " $udata " " utmp | |
249 regsub -all "\r" $udata " " utmp | |
250 regsub -all "\n" $utmp " " utmp | |
251 regsub -all " *" $utmp " " utmp | |
252 regsub -all "\t" $utmp "" utmp | |
253 return [string map -nocase $urllog_html_ent $utmp] | |
0 | 254 } |
255 | |
256 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
257 proc urllog_escape { str } { |
28 | 258 return [string map {' ''} $str] |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
259 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
260 |
0 | 261 #------------------------------------------------------------------------- |
262 proc urllog_get_short {utime} { | |
68 | 263 global urllog_shorturl_prefix |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
264 |
28 | 265 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
266 set ulen [string length $ustr] | |
0 | 267 |
28 | 268 set u1 [expr $utime / ($ulen * $ulen)] |
269 set utmp [expr $utime % ($ulen * $ulen)] | |
270 set u2 [expr $utmp / $ulen] | |
271 set u3 [expr $utmp % $ulen] | |
0 | 272 |
28 | 273 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" |
0 | 274 } |
275 | |
276 | |
277 #------------------------------------------------------------------------- | |
278 proc urllog_chop_url {url} { | |
28 | 279 global urllog_shorturl_orig |
68 | 280 |
28 | 281 if {[string length $url] > $urllog_shorturl_orig} { |
282 return "[string range $url 0 $urllog_shorturl_orig]..." | |
283 } else { | |
284 return $url | |
285 } | |
0 | 286 } |
287 | |
288 #------------------------------------------------------------------------- | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
289 proc urllog_exists {urlStr urlNick urlHost urlChan} { |
28 | 290 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 291 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
292 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[urllog_escape $urlStr]'" |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
293 urldb eval $usql { |
28 | 294 urllog_log "URL said by $urlNick ($urlStr) already known" |
295 if {$urllog_shorturl != 0} { | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
296 set qstr "[urllog_get_short $uid] " |
28 | 297 } else { |
298 set qstr "" | |
299 } | |
300 append qstr "($uuser/$uchan@[urllog_ctime $utime])" | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
301 if {[string length $utitle] > 0} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
302 set qstr "$urlmsg_alreadyknown - '$utitle' $qstr" |
28 | 303 } else { |
304 set qstr "$urlmsg_alreadyknown $qstr" | |
305 } | |
306 urllog_verb_msg $urlNick $urlChan $qstr | |
307 return 0 | |
308 } | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
309 return 1 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
310 } |
0 | 311 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
312 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
313 #------------------------------------------------------------------------- |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
314 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
315 global urldb urllog_shorturl |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
316 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
317 set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]', '[urllog_escape $urlTitle]')" |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
318 if {[catch {urldb eval $usql} uerrmsg]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
319 urllog_log "$uerrmsg on SQL:\n$usql" |
28 | 320 return 0 |
321 } | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
322 set uid [urldb last_insert_rowid] |
28 | 323 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 324 |
325 | |
28 | 326 ### Let's say something, to confirm that everything went well. |
327 if {$urllog_shorturl != 0} { | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
328 set qstr "[urllog_get_short $uid] " |
28 | 329 } else { |
330 set qstr "" | |
331 } | |
332 if {[string length $urlTitle] > 0} { | |
333 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
334 } else { | |
335 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
336 } | |
0 | 337 |
28 | 338 return 1 |
0 | 339 } |
340 | |
341 | |
342 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
343 proc urllog_http_handler {utoken utotal ucurr} { |
28 | 344 upvar #0 $utoken state |
0 | 345 |
28 | 346 # Stop fetching data after 3000 bytes, this should be enough to |
347 # contain the head section of a HTML page. | |
348 if {$ucurr > 64000} { | |
349 set state(status) "ok" | |
350 } | |
0 | 351 } |
352 | |
353 #------------------------------------------------------------------------- | |
354 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
28 | 355 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
356 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep | |
357 global urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
358 |
28 | 359 ### Print status to bot's log |
360 urllog_log "$urlStr ($urlNick@$urlChan)" | |
0 | 361 |
28 | 362 ### Try to determine the URL protocol component (if it is missing) |
363 set u_checktld 1 | |
364 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
365 set urlStr "http://$urlStr" | |
366 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
367 set urlStr "ftp://$urlStr" | |
368 } | |
0 | 369 |
28 | 370 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
371 # Check if the IP is on local network | |
372 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { | |
373 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | |
374 return 0 | |
375 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
376 |
28 | 377 # Skip TLD check for URLs with IP address |
378 set u_checktld 0 | |
379 } | |
0 | 380 |
28 | 381 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
382 urllog_log "Ignoring ShortURL." | |
383 return 0 | |
384 } | |
0 | 385 |
28 | 386 ### Check the PORT (if the ":" is there) |
387 set u_record [split $urlStr "/"] | |
388 set u_hostname [lindex $u_record 2] | |
389 set u_port [lindex [split $u_hostname ":"] end] | |
0 | 390 |
28 | 391 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
392 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
393 return 0 | |
394 } | |
0 | 395 |
28 | 396 # Default to port 80 (HTTP) |
397 if {![urllog_isnumber $u_port]} { | |
398 set u_port 80 | |
399 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
400 |
28 | 401 ### Is it a http or ftp url? (FIX ME!) |
402 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { | |
403 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" | |
404 return 0 | |
405 } | |
0 | 406 |
28 | 407 ### Check the Top Level Domain (TLD) validity |
408 if {$u_checktld != 0} { | |
409 set u_sane [lindex [split $u_hostname "."] end] | |
410 set u_tld [lindex [split $u_sane ":"] 0] | |
411 set u_found 0 | |
0 | 412 |
28 | 413 if {[string length $u_tld] == 2} { |
414 # Assume all 2-letter domains to be valid :) | |
415 set u_found 1 | |
416 } else { | |
417 # Check our list of known TLDs | |
418 foreach itld $urllog_tlds { | |
419 if {[string match $itld $u_tld]} { | |
420 set u_found 1 | |
421 } | |
422 } | |
423 } | |
0 | 424 |
28 | 425 if {$u_found == 0} { |
426 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
427 return 0 | |
428 } | |
429 } | |
0 | 430 |
28 | 431 set urlStr [string map $urllog_httprep $urlStr] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
432 |
0 | 433 |
28 | 434 ### Do we perform additional optional checks? |
435 if {$urllog_check == 0 || [string range $urlStr 0 4] != "http:"} { | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
436 # No optional checks, just add the URL, if it does not exist already |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
437 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
438 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
439 } |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
440 return 1 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
441 } |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
442 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
443 ### Does the URL already exist? |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
444 if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
28 | 445 return 1 |
446 } | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
447 |
28 | 448 ### Does the document pointed by the URL exist? |
449 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { | |
450 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
451 urllog_log "HTTP request failed: $uerrmsg" | |
452 return 0 | |
453 } | |
0 | 454 |
28 | 455 if {[::http::status $utoken] == "timeout"} { |
456 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
457 urllog_log "HTTP request timed out ($urlStr)" | |
458 return 0 | |
459 } | |
0 | 460 |
28 | 461 if {[::http::status $utoken] != "ok"} { |
462 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
463 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
464 return 0 | |
465 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
466 |
28 | 467 # Fixme! Handle redirects! |
468 set ucode [::http::ncode $utoken] | |
469 if {$ucode >= 200 && $ucode <= 309} { | |
470 set udata [::http::data $utoken] | |
471 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] | |
472 set uconvert 0 | |
473 if {[llength $umatches] > 0} { | |
474 set uencoding [lindex $umatches 1] | |
475 if {[string length $uencoding] > 3} { | |
476 set uconvert 1 | |
477 } | |
478 } | |
0 | 479 |
28 | 480 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
481 if {[llength $umatches] > 0} { | |
482 set urlTitle [lindex $umatches 1] | |
483 if {$uconvert != 0} { | |
484 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { | |
485 urllog_log "Error in charset conversion: $cerrmsg" | |
486 } | |
487 } | |
488 set urlTitle [urllog_convert_ent $urlTitle] | |
489 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
490 } else { | |
491 set urlTitle "" | |
492 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
493 |
28 | 494 # Rasiatube hack |
495 if {[string match "*/rasiatube/view*" $urlStr]} { | |
496 set rasia 0 | |
497 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] | |
498 if {[llength $umatches] > 0} { | |
499 set urlStr [lindex $umatches 1] | |
500 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr | |
501 set rasia 1 | |
502 } else { | |
503 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] | |
504 if {[llength $umatches] > 0} { | |
505 set urlStr [lindex $umatches 1] | |
506 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr | |
507 set rasia 1 | |
508 } | |
509 } | |
510 | |
511 if {$rasia != 0} { | |
512 urllog_log "RasiaTube mangler: $urlStr" | |
513 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" | |
514 } | |
515 } | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
516 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
517 # Check if the URL already exists, just in case we had some redirects |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
518 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
519 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
520 } |
28 | 521 return 1 |
522 } else { | |
523 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
524 urllog_log "[::http::code $utoken] - $urlStr" | |
525 } | |
0 | 526 |
28 | 527 ::http::cleanup $utoken |
0 | 528 } |
529 | |
530 | |
531 #------------------------------------------------------------------------- | |
532 proc urllog_checkmsg {nick uhost hand chan text} { | |
28 | 533 ### Check the nick |
534 if {$nick == "*"} { | |
535 urllog_log "urllog_checkmsg: nick was wc, this should not happen." | |
536 return 0 | |
537 } | |
0 | 538 |
28 | 539 ### Do the URL checking |
540 foreach istr [split $text " "] { | |
541 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $istr]} { | |
542 urllog_checkurl $istr $nick $uhost $chan | |
543 } | |
544 } | |
0 | 545 |
28 | 546 return 0 |
0 | 547 } |
548 | |
549 | |
550 #------------------------------------------------------------------------- | |
551 ### Parse arguments, find and show the results | |
552 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
553 global urllog_shorturl urldb |
28 | 554 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 555 |
28 | 556 if {$upublic == 0} { |
557 set ulimit 5 | |
558 } else { | |
559 set ulimit 3 | |
560 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
561 |
28 | 562 ### Parse the given command |
563 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 564 |
28 | 565 set ftokens [split $utext " "] |
566 set fpatlist "" | |
567 foreach ftoken $ftokens { | |
568 set fprefix [string range $ftoken 0 0] | |
569 set fpattern [string range $ftoken 1 end] | |
0 | 570 |
28 | 571 if {$fprefix == "-"} { |
572 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" | |
573 } elseif {$fprefix == "%"} { | |
574 lappend fpatlist "user='[urllog_escape $fpattern]'" | |
575 } elseif {$fprefix == "@"} { | |
576 # foo | |
577 } else { | |
578 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" | |
579 } | |
580 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
581 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
582 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
583 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
584 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
585 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
586 } |
68 | 587 |
28 | 588 set iresults 0 |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
589 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" |
68 | 590 urldb eval $usql { |
28 | 591 incr iresults |
592 set shortURL $uurl | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
593 if {$urllog_shorturl != 0 && $uid != ""} { |
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
594 set shortURL "$shortURL [urllog_get_short $uid]" |
28 | 595 } |
596 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" | |
597 } | |
598 | |
599 if {$iresults == 0} { | |
600 # If no URLs were found | |
601 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
602 } | |
0 | 603 |
28 | 604 return 0 |
0 | 605 } |
606 | |
607 | |
608 #------------------------------------------------------------------------- | |
609 ### Finding binded functions | |
610 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
28 | 611 urllog_find $unick $uhand $uchan $utext 1 |
612 return 0 | |
0 | 613 } |
614 | |
615 | |
616 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
28 | 617 urllog_find $unick $uhand "" $utext 0 |
618 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
619 } |
0 | 620 |
621 # end of script |