Mercurial > hg > egg-tcls
annotate urllog.tcl @ 19:9cf22053e5da
Repair !urlfind functionality.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 05 Sep 2011 16:56:13 +0300 |
parents | 1e2232135354 |
children | 45b76d81e256 |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
3 # URLLog v2.00.0 by ccr/TNSP <ccr@tnsp.org> |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
4 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP) |
0 | 5 # |
6 ########################################################################## | |
7 # | |
8 # NOTICE! If you are upgrading to v1.90+ from any older version, you | |
9 # might want to run a conversion script against your URL-database file. | |
10 # | |
11 # It is NOT strictly necessary, but recommended especially if you | |
12 # utilize the "shorturl" functionality. The conversion script is | |
13 # available at < http://tnsp.org/egg-tcls/ > | |
14 # | |
15 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
16 |
0 | 17 ### |
18 ### HTTP options | |
19 ### | |
20 # Set to 1 if you want to use proxy | |
21 set http_proxy 0 | |
22 | |
23 # Proxy host and port number (only used if enabled above) | |
24 set http_proxy_host "" | |
25 set http_proxy_port 8080 | |
26 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
27 set http_tls_support 0 |
0 | 28 |
29 ### | |
30 ### General options | |
31 ### | |
32 | |
33 # Filename where the logged URL data goes | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
34 set urllog_db_file "urllog.sqlite" |
0 | 35 |
36 | |
37 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
38 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
39 set urllog_verbose 1 | |
40 | |
41 | |
42 # 1 = Put some info to bot's Logfile during operation | |
43 # 0 = Don't. | |
44 set urllog_logmsg 1 | |
45 | |
46 | |
47 # 1 = Check URLs for validity and existence before adding. | |
48 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
49 set urllog_check 1 | |
50 | |
51 | |
52 ### | |
53 ### Search related settings | |
54 ### | |
55 | |
56 # 0 = No search-commands available | |
57 # 1 = Search enabled | |
58 set urllog_search 1 | |
59 | |
60 | |
61 # How many URL's should the !urlfind command show (maximum limit) | |
62 set urllog_showmax_pub 3 | |
63 | |
64 | |
65 # For private-search, this is the default limit (user can change it) | |
66 set urllog_showmax_priv 6 | |
67 | |
68 | |
69 ### | |
70 ### ShortURL-settings | |
71 ### | |
72 | |
73 # 1 = Use ShortURLs | |
74 # 0 = Don't. | |
75 set urllog_shorturl 1 | |
76 | |
77 # Max length of original URL to be shown | |
78 set urllog_shorturl_orig 30 | |
79 | |
80 # Path to PHP/CGI-script that redirects ShortURLs | |
81 set urllog_shorturl_prefix "http://tnsp.org/u/" | |
82 | |
83 | |
84 ### | |
85 ### Message-texts | |
86 ### | |
87 | |
88 # No such host was found | |
89 set urlmsg_nosuchhost "ei tommosta oo!" | |
90 | |
91 # Could not connect host (I/O errors etc) | |
92 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
93 | |
94 # HTTP timeout | |
95 set urlmsg_timeout "ei jaksa ootella" | |
96 | |
97 # No such document was found | |
98 set urlmsg_errorgettingdoc "siitosvirhe" | |
99 | |
100 # URL was already known (was in database) | |
101 set urlmsg_alreadyknown "wanha!" | |
102 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
103 | |
104 # No match was found when searched with !urlfind or other command | |
105 set urlmsg_nomatch "Ei osumia." | |
106 | |
107 | |
108 ### | |
109 ### Things that you usually don't need to touch ... | |
110 ### | |
111 | |
112 # What IRC "command" should we use to send messages: | |
113 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
114 set urllog_preferredmsg "PRIVMSG" | |
115 | |
116 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
117 # (Now includes the new IANA published TLDs) | |
118 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
119 | |
120 | |
121 ########################################################################## | |
122 # No need to look below this line | |
123 ########################################################################## | |
124 #------------------------------------------------------------------------- | |
125 set urllog_name "URLLog" | |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
126 set urllog_version "2.00.0" |
0 | 127 |
128 set urllog_tlds [split $urllog_tlds ","] | |
129 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
130 | |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
131 set urllog_html_ent [split "‏||—|-|‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] |
0 | 132 |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
133 ### Require packages |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
134 package require sqlite3 |
0 | 135 package require http |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
136 |
0 | 137 ### Binding initializations |
138 if {$urllog_search != 0} { | |
139 bind pub - !urlfind urllog_pub_urlfind | |
140 bind msg - urlfind urllog_msg_urlfind | |
141 } | |
142 | |
143 bind pubm - *.* urllog_checkmsg | |
144 bind topc - *.* urllog_checkmsg | |
145 bind msg - paska urllog_checkmsg2 | |
146 | |
147 | |
148 ### Initialization messages | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
149 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP" |
0 | 150 putlog "$urllog_message" |
151 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
152 ### HTTP module initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
153 ::http::config -useragent "$urllog_name/$urllog_version" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
154 if {$http_proxy != 0} { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
155 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
156 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
157 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
158 if {$http_tls_support != 0} { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
159 package require tls |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
160 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"] |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
161 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
162 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
163 ### SQLite database initialization |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
164 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
165 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
166 exit 2 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
167 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
168 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
169 |
0 | 170 if {$http_proxy != 0} { |
171 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" | |
172 } | |
173 | |
174 if {$urllog_check != 0} { | |
175 putlog " (Additional URL validity checks enabled)" | |
176 } | |
177 | |
178 if {$urllog_verbose != 0} { | |
179 putlog " (Verbose mode enabled)" | |
180 } | |
181 | |
182 if {$urllog_search != 0} { | |
183 putlog " (Search commands enabled)" | |
184 } | |
185 | |
186 #------------------------------------------------------------------------- | |
187 ### Utility functions | |
188 proc urllog_log {arg} { | |
189 global urllog_logmsg urllog_name | |
190 | |
191 if {$urllog_logmsg != 0} { | |
192 putlog "$urllog_name: $arg" | |
193 } | |
194 } | |
195 | |
196 | |
197 proc urllog_ctime { utime } { | |
198 | |
199 if {$utime == "" || $utime == "*"} { | |
200 set utime 0 | |
201 } | |
202 | |
203 return [clock format $utime -format "%d.%m.%Y %H:%M"] | |
204 } | |
205 | |
206 | |
207 proc urllog_isnumber {uarg} { | |
208 set ufoo 1 | |
209 | |
210 foreach i [split $uarg {}] { | |
211 if {![string match \[0-9\] $i]} {set ufoo 0} | |
212 } | |
213 | |
214 return $ufoo | |
215 } | |
216 | |
217 | |
218 proc urllog_msg {apublic anick achan amsg} { | |
219 global urllog_preferredmsg | |
220 | |
221 if {$apublic == 1} { | |
222 putserv "$urllog_preferredmsg $achan :$amsg" | |
223 } else { | |
224 putserv "$urllog_preferredmsg $anick :$amsg" | |
225 } | |
226 } | |
227 | |
228 | |
229 proc urllog_verb_msg {anick achan amsg} { | |
230 global urllog_verbose | |
231 | |
232 if {$urllog_verbose != 0} { | |
233 urllog_msg 1 $anick $achan $amsg | |
234 } | |
235 } | |
236 | |
237 | |
238 proc urllog_convert_ent {udata} { | |
239 global urllog_html_ent | |
240 regsub -all " " $udata " " utmp | |
241 regsub -all "\r" $udata " " utmp | |
242 regsub -all "\n" $utmp " " utmp | |
243 regsub -all " *" $utmp " " utmp | |
244 regsub -all "\t" $utmp "" utmp | |
245 return [string map -nocase $urllog_html_ent $utmp] | |
246 } | |
247 | |
248 | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
249 proc urllog_escape { str } { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
250 return [string map {' ''} $str] |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
251 } |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
252 |
0 | 253 #------------------------------------------------------------------------- |
254 proc urllog_get_short {utime} { | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
255 global urllog_shorturl urllog_shorturl_prefix urllog_shorturl |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
256 |
0 | 257 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
258 set ulen [string length $ustr] | |
259 | |
260 set u1 [expr $utime / ($ulen * $ulen)] | |
261 set utmp [expr $utime % ($ulen * $ulen)] | |
262 set u2 [expr $utmp / $ulen] | |
263 set u3 [expr $utmp % $ulen] | |
264 | |
265 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" | |
266 } | |
267 | |
268 | |
269 #------------------------------------------------------------------------- | |
270 proc urllog_chop_url {url} { | |
271 global urllog_shorturl_orig | |
272 if {[string length $url] > $urllog_shorturl_orig} { | |
273 return "[string range $url 0 $urllog_shorturl_orig]..." | |
274 } else { | |
275 return $url | |
276 } | |
277 } | |
278 | |
279 #------------------------------------------------------------------------- | |
280 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
281 global urldb urlmsg_alreadyknown urllog_shorturl |
0 | 282 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
283 ### Let's check if we already know the URL |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
284 set tmpURL [urllog_escape $urlStr] |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
285 urldb eval "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls WHERE url='$tmpURL'" { |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
286 urllog_log "URL said by $urlNick ($urlStr) already known" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
287 if {$urllog_shorturl != 0} { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
288 set qstr "[urllog_get_short $urlID] " |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
289 } else { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
290 set qstr "" |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
291 } |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
292 append qstr "($uuser@[urllog_ctime $utime])" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
293 if {[string length $urlTitle] > 0} { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
294 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
295 } else { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
296 set qstr "$urlmsg_alreadyknown $qstr" |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
297 } |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
298 urllog_verb_msg $urlNick $urlChan $qstr |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
299 return 0 |
0 | 300 } |
301 | |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
302 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
303 ### OK, the URL was not already known - thus we add it |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
304 set sql "INSERT INTO urls (utime,url,user,host) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]')" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
305 if {[catch {urldb eval $sql} uerrmsg]} { |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
306 urllog_log "$uerrmsg on SQL:\n$sql" |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
307 return 0 |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
308 } |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
309 set urlID [urldb last_insert_rowid] |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
310 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 311 |
312 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
313 ### Let's say something, to confirm that everything went well. |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
314 if {$urllog_shorturl != 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
315 set qstr "[urllog_get_short $urlID] " |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
316 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
317 set qstr "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
318 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
319 if {[string length $urlTitle] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
320 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
321 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
322 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
323 } |
0 | 324 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
325 return 1 |
0 | 326 } |
327 | |
328 | |
329 #------------------------------------------------------------------------- | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
330 proc urllog_http_handler {utoken utotal ucurr} { |
0 | 331 upvar #0 $utoken state |
332 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
333 # Stop fetching data after 3000 bytes, this should be enough to |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
334 # contain the head section of a HTML page. |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
335 if {$ucurr > 64000} { |
0 | 336 set state(status) "ok" |
337 } | |
338 } | |
339 | |
340 #------------------------------------------------------------------------- | |
341 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
342 global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
343 global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep |
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
344 global urllog_shorturl_prefix urllog_shorturl urllog_encoding |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
345 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
346 ### Print status to bot's log |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
347 urllog_log "$urlStr ($urlNick@$urlChan)" |
0 | 348 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
349 ### Try to determine the URL protocol component (if it is missing) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
350 set u_checktld 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
351 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
352 set urlStr "http://$urlStr" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
353 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
354 set urlStr "ftp://$urlStr" |
0 | 355 } |
356 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
357 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
358 # Check if the IP is on local network |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
359 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
360 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
361 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
362 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
363 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
364 # Skip TLD check for URLs with IP address |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
365 set u_checktld 0 |
0 | 366 } |
367 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
368 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
369 urllog_log "Ignoring ShortURL." |
0 | 370 return 0 |
371 } | |
372 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
373 ### Check the PORT (if the ":" is there) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
374 set u_record [split $urlStr "/"] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
375 set u_hostname [lindex $u_record 2] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
376 set u_port [lindex [split $u_hostname ":"] end] |
0 | 377 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
378 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
379 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
380 return 0 |
0 | 381 } |
382 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
383 # Default to port 80 (HTTP) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
384 if {![urllog_isnumber $u_port]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
385 set u_port 80 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
386 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
387 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
388 ### Is it a http or ftp url? (FIX ME!) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
389 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
390 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
391 return 0 |
0 | 392 } |
393 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
394 ### Check the Top Level Domain (TLD) validity |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
395 if {$u_checktld != 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
396 set u_sane [lindex [split $u_hostname "."] end] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
397 set u_tld [lindex [split $u_sane ":"] 0] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
398 set u_found 0 |
0 | 399 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
400 if {[string length $u_tld] == 2} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
401 # Assume all 2-letter domains to be valid :) |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
402 set u_found 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
403 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
404 # Check our list of known TLDs |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
405 foreach itld $urllog_tlds { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
406 if {[string match $itld $u_tld]} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
407 set u_found 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
408 } |
0 | 409 } |
410 } | |
411 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
412 if {$u_found == 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
413 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
414 return 0 |
0 | 415 } |
416 } | |
417 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
418 set urlStr [string map $urllog_httprep $urlStr] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
419 |
0 | 420 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
421 ### Do we perform additional optional checks? |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
422 if {$urllog_check == 0 || [string range $urlStr 0 4] != "http:"} { |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
423 # No optional checks, just add the URL |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
424 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
425 return 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
426 } |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
427 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
428 ### Does the document pointed by the URL exist? |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
429 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 3000]} uerrmsg]} { |
0 | 430 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" |
431 urllog_log "HTTP request failed: $uerrmsg" | |
432 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
433 } |
0 | 434 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
435 if {[::http::status $utoken] == "timeout"} { |
0 | 436 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" |
437 urllog_log "HTTP request timed out ($urlStr)" | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
438 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
439 } |
0 | 440 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
441 if {[::http::status $utoken] != "ok"} { |
0 | 442 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" |
443 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
444 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
445 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
446 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
447 # Fixme! Handle redirects! |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
448 set ucode [::http::ncode $utoken] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
449 if {$ucode >= 200 && $ucode <= 309} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
450 set udata [::http::data $utoken] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
451 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
452 set uconvert 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
453 if {[llength $umatches] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
454 set uencoding [lindex $umatches 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
455 if {[string length $uencoding] > 3} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
456 set uconvert 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
457 } |
0 | 458 } |
459 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
460 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
461 if {[llength $umatches] > 0} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
462 set urlTitle [lindex $umatches 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
463 if {$uconvert != 0} { |
8
7ceb3b5aeaa4
Catch possible error exceptions in character set conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
7
diff
changeset
|
464 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { |
7ceb3b5aeaa4
Catch possible error exceptions in character set conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
7
diff
changeset
|
465 urllog_log "Error in charset conversion: $cerrmsg" |
7ceb3b5aeaa4
Catch possible error exceptions in character set conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
7
diff
changeset
|
466 } |
0 | 467 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
468 set urlTitle [urllog_convert_ent $urlTitle] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
469 regsub -all "(^ *| *$)" $urlTitle "" urlTitle |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
470 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
471 set urlTitle "" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
472 } |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
473 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
474 # Rasiatube hack |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
475 if {[string match "*/rasiatube/view*" $urlStr]} { |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
476 set rasia 0 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
477 set umatches [regexp -nocase -inline -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata] |
0 | 478 if {[llength $umatches] > 0} { |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
479 set urlStr [lindex $umatches 1] |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
480 regsub -all "\/v\/" $urlStr "\/watch\?v=" urlStr |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
481 set rasia 1 |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
482 } else { |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
483 set umatches [regexp -nocase -inline -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata] |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
484 if {[llength $umatches] > 0} { |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
485 set urlStr [lindex $umatches 1] |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
486 regsub "http:\/\/www.dailymotion.com\/swf\/" $urlStr "http:\/\/www.dailymotion.com\/video\/" urlStr |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
487 set rasia 1 |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
488 } |
0 | 489 } |
4
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
490 |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
491 if {$rasia != 0} { |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
492 urllog_log "RasiaTube mangler: $urlStr" |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
493 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr" |
8c9049f2b2b0
Improve RasiaTube de-mangler.
Matti Hamalainen <ccr@tnsp.org>
parents:
3
diff
changeset
|
494 } |
0 | 495 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
496 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
497 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
498 return 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
499 } else { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
500 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
501 urllog_log "[::http::code $utoken] - $urlStr" |
0 | 502 } |
503 | |
504 ::http::cleanup $utoken | |
505 } | |
506 | |
507 | |
508 #------------------------------------------------------------------------- | |
509 proc urllog_checkmsg {nick uhost hand chan text} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
510 ### Check the nick |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
511 if {$nick == "*"} { |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
512 urllog_log "urllog_checkmsg: nick was wc, this should not happen." |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
513 return 0 |
0 | 514 } |
515 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
516 ### Do the URL checking |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
517 foreach istr [split $text " "] { |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
518 if {[regexp "(ftp|http|https)://|www\..+|ftp\..*" $istr]} { |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
519 urllog_checkurl $istr $nick $uhost $chan |
0 | 520 } |
521 } | |
522 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
523 return 0 |
0 | 524 } |
525 | |
526 | |
527 #------------------------------------------------------------------------- | |
528 ### Parse arguments, find and show the results | |
529 proc urllog_find {unick uhand uchan utext upublic} { | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
530 global urllog_name urllog_version urllog_shorturl urldb |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
531 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 532 |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
533 if {$upublic == 0} { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
534 set ulimit 5 |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
535 } else { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
536 set ulimit 3 |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
537 } |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
538 |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
539 ### Parse the given command |
0 | 540 urllog_log "$unick/$uhand searched URL: $utext" |
541 | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
542 set ftokens [split $utext " "] |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
543 foreach ftoken $ftokens { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
544 set fprefix [string range $ftoken 0 0] |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
545 set fpattern [string range $ftoken 1 end] |
0 | 546 |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
547 if {$fprefix == "-"} { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
548 lappend fpatlist "url NOT LIKE '%[urllog_escape $fpattern]%'" |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
549 } elseif {$fprefix == "%"} { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
550 lappend fpatlist "user='[urllog_escape $fpattern]'" |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
551 } elseif {$fprefix == "@"} { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
552 # foo |
0 | 553 } else { |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
554 lappend fpatlist "url LIKE '%[urllog_escape $ftoken]%'" |
0 | 555 } |
556 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
557 |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
558 set iresults 0 |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
559 set query "SELECT id AS urlID, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls WHERE [join $fpatlist " AND "] ORDER BY utime DESC LIMIT $ulimit" |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
560 urldb eval $query { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
561 incr iresults |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
562 set shortURL $uurl |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
563 if {$urllog_shorturl != 0 && $urlID != ""} { |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
564 set shortURL "$shortURL [urllog_get_short $urlID]" |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
565 } |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
566 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])" |
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
567 } |
0 | 568 |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
569 if {$iresults == 0} { |
0 | 570 # If no URLs were found |
571 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
572 } | |
573 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
574 return 0 |
0 | 575 } |
576 | |
577 | |
578 #------------------------------------------------------------------------- | |
579 ### Finding binded functions | |
580 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
581 urllog_find $unick $uhand $uchan $utext 1 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
582 return 0 |
0 | 583 } |
584 | |
585 | |
586 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
587 urllog_find $unick $uhand "" $utext 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
588 return 0 |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
589 } |
0 | 590 |
591 | |
592 #------------------------------------------------------------------------- | |
593 proc urllog_checkmsg2 {unick uhost uhand utext} { | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
594 urllog_checkurl $utext $unick $uhost "#CHANNEL" |
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
595 return 0 |
0 | 596 } |
597 | |
598 | |
599 # end of script |