Mercurial > hg > egg-tcls
comparison urllog.tcl @ 0:1c4e2814cd41
Initial import.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 21 Sep 2010 13:12:49 +0300 |
parents | |
children | 8003090caa35 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1c4e2814cd41 |
---|---|
1 ########################################################################## | |
2 # | |
3 # URLLog v1.99.12 by ccr/TNSP <ccr@tnsp.org> | |
4 # (C) Copyright 2000-2010 Tecnic Software productions (TNSP) | |
5 # | |
6 ########################################################################## | |
7 # | |
8 # NOTICE! If you are upgrading to v1.90+ from any older version, you | |
9 # might want to run a conversion script against your URL-database file. | |
10 # | |
11 # It is NOT strictly necessary, but recommended especially if you | |
12 # utilize the "shorturl" functionality. The conversion script is | |
13 # available at < http://tnsp.org/egg-tcls/ > | |
14 # | |
15 ########################################################################## | |
16 ### | |
17 ### HTTP options | |
18 ### | |
19 # Set to 1 if you want to use proxy | |
20 set http_proxy 0 | |
21 | |
22 # Proxy host and port number (only used if enabled above) | |
23 set http_proxy_host "" | |
24 set http_proxy_port 8080 | |
25 | |
26 | |
27 ### | |
28 ### General options | |
29 ### | |
30 | |
31 # Filename where the logged URL data goes | |
32 set urllog_file "data.urllog" | |
33 | |
34 | |
35 # 1 = Verbose: Say messages when URL is OK, bad, etc. | |
36 # 0 = Quiet : Be quiet (only speak if asked with !urlfind, etc) | |
37 set urllog_verbose 1 | |
38 | |
39 | |
40 # 1 = Put some info to bot's Logfile during operation | |
41 # 0 = Don't. | |
42 set urllog_logmsg 1 | |
43 | |
44 | |
45 # 1 = Check URLs for validity and existence before adding. | |
46 # 0 = No checks. Add _anything_ that looks like an URL to the database. | |
47 set urllog_check 1 | |
48 | |
49 | |
50 ### | |
51 ### WWW page creation options | |
52 ### | |
53 # 1 = Create the www pages, 0 = Don't. | |
54 set urllog_makewww 0 | |
55 | |
56 # TIP: If you don't want the bot to create the HTML-file, you can | |
57 # use a simple Perl/Ruby/Python/PHP/whatnot-scripted page to do that. | |
58 | |
59 # Filename AND FULL PATH of the html-file where the www-format log goes | |
60 # (Remember to set the permissions right after the file has been created) | |
61 set urllog_webfile "/home/niinuska/public_html/urllog.html" | |
62 | |
63 | |
64 ### | |
65 ### Search related settings | |
66 ### | |
67 | |
68 # 0 = No search-commands available | |
69 # 1 = Search enabled | |
70 set urllog_search 1 | |
71 | |
72 | |
73 # How many URL's should the !urlfind command show (maximum limit) | |
74 set urllog_showmax_pub 3 | |
75 | |
76 | |
77 # For private-search, this is the default limit (user can change it) | |
78 set urllog_showmax_priv 6 | |
79 | |
80 | |
81 ### | |
82 ### ShortURL-settings | |
83 ### | |
84 | |
85 # 1 = Use ShortURLs | |
86 # 0 = Don't. | |
87 set urllog_shorturl 1 | |
88 | |
89 # Max length of original URL to be shown | |
90 set urllog_shorturl_orig 30 | |
91 | |
92 # Path to PHP/CGI-script that redirects ShortURLs | |
93 set urllog_shorturl_prefix "http://tnsp.org/u/" | |
94 | |
95 | |
96 ### | |
97 ### Message-texts | |
98 ### | |
99 | |
100 # No such host was found | |
101 set urlmsg_nosuchhost "ei tommosta oo!" | |
102 | |
103 # Could not connect host (I/O errors etc) | |
104 set urlmsg_ioerror "kraak, virhe yhdynnässä." | |
105 | |
106 # HTTP timeout | |
107 set urlmsg_timeout "ei jaksa ootella" | |
108 | |
109 # No such document was found | |
110 set urlmsg_errorgettingdoc "siitosvirhe" | |
111 | |
112 # URL was already known (was in database) | |
113 set urlmsg_alreadyknown "wanha!" | |
114 #set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." | |
115 | |
116 # No match was found when searched with !urlfind or other command | |
117 set urlmsg_nomatch "Ei osumia." | |
118 | |
119 | |
120 ### | |
121 ### Things that you usually don't need to touch ... | |
122 ### | |
123 | |
124 # What IRC "command" should we use to send messages: | |
125 # (Valid alternatives are "PRIVMSG" and "NOTICE") | |
126 set urllog_preferredmsg "PRIVMSG" | |
127 | |
128 # The valid known Top Level Domains (TLDs), but not the country code TLDs | |
129 # (Now includes the new IANA published TLDs) | |
130 set urllog_tlds "org,com,net,mil,gov,biz,edu,coop,aero,info,museum,name,pro,int" | |
131 | |
132 | |
133 ########################################################################## | |
134 # No need to look below this line | |
135 ########################################################################## | |
136 #------------------------------------------------------------------------- | |
137 set urllog_name "URLLog" | |
138 set urllog_version "1.99.12" | |
139 | |
140 set urllog_tlds [split $urllog_tlds ","] | |
141 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] | |
142 | |
143 set urllog_html_ent [split "‪||‬||‎||å|å|Å|Å|é|é|:|:|ä|ä|ö|ö|ä|ä|ö|ö| | |-|-|”|\"|“|\"|»|>>|"|\"|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>|ä|ä|ö|ö|Ä|Ä" "|"] | |
144 | |
145 | |
146 ### HTTP module initialization | |
147 package require http | |
148 ::http::config -useragent "$urllog_name/$urllog_version" | |
149 if {$http_proxy != 0} { | |
150 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port | |
151 } | |
152 | |
153 | |
154 ### Binding initializations | |
155 if {$urllog_search != 0} { | |
156 bind pub - !urlfind urllog_pub_urlfind | |
157 bind msg - urlfind urllog_msg_urlfind | |
158 } | |
159 | |
160 bind pubm - *.* urllog_checkmsg | |
161 bind topc - *.* urllog_checkmsg | |
162 bind msg - paska urllog_checkmsg2 | |
163 | |
164 if {$urllog_makewww != 0} { | |
165 bind dcc m dowebfile urllog_dowebfile | |
166 bind time - "*0 % % % %" urllog_timer | |
167 } | |
168 | |
169 | |
170 ### Initialization messages | |
171 set urllog_message "$urllog_name v$urllog_version (C) 2000-2010 ccr/TNSP" | |
172 putlog "$urllog_message" | |
173 | |
174 if {$http_proxy != 0} { | |
175 putlog " (Using proxy $http_proxy_host:$http_proxy_port)" | |
176 } | |
177 | |
178 if {$urllog_check != 0} { | |
179 putlog " (Additional URL validity checks enabled)" | |
180 } | |
181 | |
182 if {$urllog_verbose != 0} { | |
183 putlog " (Verbose mode enabled)" | |
184 } | |
185 | |
186 if {$urllog_makewww != 0} { | |
187 putlog " (Creating WWW page to $urllog_webfile)" | |
188 } | |
189 | |
190 if {$urllog_search != 0} { | |
191 putlog " (Search commands enabled)" | |
192 } | |
193 | |
194 | |
195 #------------------------------------------------------------------------- | |
196 ### Utility functions | |
197 proc urllog_log {arg} { | |
198 global urllog_logmsg urllog_name | |
199 | |
200 if {$urllog_logmsg != 0} { | |
201 putlog "$urllog_name: $arg" | |
202 } | |
203 } | |
204 | |
205 | |
206 proc urllog_ctime { utime } { | |
207 | |
208 if {$utime == "" || $utime == "*"} { | |
209 set utime 0 | |
210 } | |
211 | |
212 return [clock format $utime -format "%d.%m.%Y %H:%M"] | |
213 } | |
214 | |
215 | |
216 proc urllog_dowebfile {hand idx text} { | |
217 global urllog_name urllog_makewww | |
218 | |
219 if {$urllog_makewww == 1} { | |
220 urllog_log "Making webfiles..." | |
221 urllog_make_web_file | |
222 } else { | |
223 urllog_log "Webfile disabled." | |
224 } | |
225 } | |
226 | |
227 | |
228 proc urllog_isnumber {uarg} { | |
229 set ufoo 1 | |
230 | |
231 foreach i [split $uarg {}] { | |
232 if {![string match \[0-9\] $i]} {set ufoo 0} | |
233 } | |
234 | |
235 return $ufoo | |
236 } | |
237 | |
238 | |
239 proc urllog_msg {apublic anick achan amsg} { | |
240 global urllog_preferredmsg | |
241 | |
242 if {$apublic == 1} { | |
243 putserv "$urllog_preferredmsg $achan :$amsg" | |
244 } else { | |
245 putserv "$urllog_preferredmsg $anick :$amsg" | |
246 } | |
247 } | |
248 | |
249 | |
250 proc urllog_verb_msg {anick achan amsg} { | |
251 global urllog_verbose | |
252 | |
253 if {$urllog_verbose != 0} { | |
254 urllog_msg 1 $anick $achan $amsg | |
255 } | |
256 } | |
257 | |
258 | |
259 proc urllog_convert_ent {udata} { | |
260 global urllog_html_ent | |
261 regsub -all " " $udata " " utmp | |
262 regsub -all "\r" $udata " " utmp | |
263 regsub -all "\n" $utmp " " utmp | |
264 regsub -all " *" $utmp " " utmp | |
265 regsub -all "\t" $utmp "" utmp | |
266 return [string map -nocase $urllog_html_ent $utmp] | |
267 } | |
268 | |
269 | |
270 #------------------------------------------------------------------------- | |
271 proc urllog_make_web_file {} { | |
272 global urllog_file urllog_webfile urllog_message botnick | |
273 | |
274 # Starting message | |
275 urllog_log "Creating HTML-file for WWW..." | |
276 | |
277 # Open files | |
278 set fd [open $urllog_webfile w] | |
279 set fd2 [open $urllog_file r] | |
280 | |
281 # HTML headers | |
282 puts $fd "<html><head><title>Caught URLs</title></head>" | |
283 puts $fd "<body bgcolor=\"#FFFFFF\" text=\"#0020a0\" link=\"#0020a0\" vlink=\"#0020a0\" alink=\"#0020a0\">" | |
284 puts $fd "<font face=\"Helvetica, Arial\">" | |
285 puts $fd "<center><font size=\"6\">URLs caught by $botnick</center><hr>" | |
286 puts $fd "<font size=\"3\">" | |
287 | |
288 # Process database, convert to links & info | |
289 while {![eof $fd2]} { | |
290 gets $fd2 foo | |
291 if {$foo != ""} { | |
292 regsub -all "<|>|\"" $foo "" foo | |
293 set foo [split $foo " "] | |
294 puts $fd "<a href=\"[lindex $foo 0]\">[lindex $foo 0]</a><br>Added on <B>[urllog_ctime [lindex $foo 1]]</B> by <B>[lindex $foo 2]</B><br><hr>" | |
295 } | |
296 } | |
297 | |
298 # HTML footers | |
299 puts $fd "<center>Generated by $urllog_message<BR>" | |
300 puts $fd "(Last updated <B>[urllog_ctime [unixtime]]</B>)</center>" | |
301 puts $fd "</body></html>" | |
302 | |
303 # Close files | |
304 close $fd | |
305 close $fd2 | |
306 | |
307 # OK-message | |
308 urllog_log "HTML-file generated OK." | |
309 } | |
310 | |
311 | |
312 #------------------------------------------------------------------------- | |
313 proc urllog_timer {umin uhour uday umonth uyear} { | |
314 urllog_make_web_file | |
315 } | |
316 | |
317 | |
318 #------------------------------------------------------------------------- | |
319 proc urllog_get_short {utime} { | |
320 global urllog_shorturl urllog_shorturl_prefix | |
321 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" | |
322 set ulen [string length $ustr] | |
323 | |
324 set u1 [expr $utime / ($ulen * $ulen)] | |
325 set utmp [expr $utime % ($ulen * $ulen)] | |
326 set u2 [expr $utmp / $ulen] | |
327 set u3 [expr $utmp % $ulen] | |
328 | |
329 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" | |
330 } | |
331 | |
332 | |
333 #------------------------------------------------------------------------- | |
334 proc urllog_chop_url {url} { | |
335 global urllog_shorturl_orig | |
336 if {[string length $url] > $urllog_shorturl_orig} { | |
337 return "[string range $url 0 $urllog_shorturl_orig]..." | |
338 } else { | |
339 return $url | |
340 } | |
341 } | |
342 | |
343 #------------------------------------------------------------------------- | |
344 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { | |
345 global urlmsg_alreadyknown urllog_file urllog_shorturl | |
346 | |
347 ### Let's check if we already know the URL | |
348 set fd [open $urllog_file a+] | |
349 close $fd | |
350 | |
351 set fd [open $urllog_file r] | |
352 set urlID -1 | |
353 while {![eof $fd]} { | |
354 set qitems [split [gets $fd] " "] | |
355 set qindex [lindex $qitems 4] | |
356 if {$qindex != "" && $qindex > $urlID} { | |
357 set urlID $qindex | |
358 } | |
359 if {[lindex $qitems 0] == $urlStr} { | |
360 urllog_log "URL said by $urlNick ($urlStr) already known" | |
361 if {$urllog_shorturl != 0} { | |
362 set qstr "[urllog_get_short $urlID] " | |
363 } else { | |
364 set qstr "" | |
365 } | |
366 append qstr "([lindex $qitems 2]@[urllog_ctime [lindex $qitems 1]])" | |
367 if {[string length $urlTitle] > 0} { | |
368 set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr" | |
369 } else { | |
370 set qstr "$urlmsg_alreadyknown $qstr" | |
371 } | |
372 urllog_verb_msg $urlNick $urlChan $qstr | |
373 return 0 | |
374 } | |
375 } | |
376 close $fd | |
377 | |
378 | |
379 ### OK, the URL was not already known - thus we add it | |
380 incr urlID | |
381 set urlTime [unixtime] | |
382 set fd [open $urllog_file a+] | |
383 puts $fd "$urlStr $urlTime $urlNick ($urlHost) $urlID" | |
384 close $fd | |
385 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" | |
386 | |
387 | |
388 ### Let's say something, to confirm that everything went well. | |
389 if {$urllog_shorturl != 0} { | |
390 set qstr "[urllog_get_short $urlID] " | |
391 } else { | |
392 set qstr "" | |
393 } | |
394 if {[string length $urlTitle] > 0} { | |
395 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" | |
396 } else { | |
397 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
398 } | |
399 | |
400 return 1 | |
401 } | |
402 | |
403 | |
404 #------------------------------------------------------------------------- | |
405 proc urllog_http_handler {utoken utotal ucurrent} { | |
406 upvar #0 $utoken state | |
407 | |
408 # Stop after around 3000 received bytes, typically we would assume | |
409 # that <head> section is contained in this amount of data. | |
410 if {$state(currentsize) >= 3000} { | |
411 set state(status) "ok" | |
412 } | |
413 } | |
414 | |
415 #------------------------------------------------------------------------- | |
416 proc urllog_checkurl {urlStr urlNick urlHost urlChan} { | |
417 global botnick urllog_html urllog_tlds urllog_check urllog_file | |
418 global urlmsg_nosuchhost urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc | |
419 global urllog_httprep urllog_shorturl_prefix urllog_shorturl urllog_encoding | |
420 | |
421 ### Some status | |
422 urllog_log "$urlStr ($urlNick@$urlChan)" | |
423 | |
424 ### Try to determine the URL protocol component (if it is missing) | |
425 set u_checktld 1 | |
426 | |
427 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} { | |
428 set urlStr "http://$urlStr" | |
429 } | |
430 | |
431 if {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} { | |
432 set urlStr "ftp://$urlStr" | |
433 } | |
434 | |
435 if {[regexp "(ftp|http|https)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match u_prefix ni1 ni2 ni3 ni4]} { | |
436 # Check if the IP is on local network | |
437 if {($ni1 == 127) || ($ni1 == 10) || ($ni1 == 192 && $ni2 == 168) || ($ni1 == 0)} { | |
438 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | |
439 return 0 | |
440 } | |
441 | |
442 # Skip TLD check for URLs with IP address | |
443 set u_checktld 0 | |
444 } | |
445 | |
446 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { | |
447 urllog_log "Ignoring ShortURL." | |
448 return 0 | |
449 } | |
450 | |
451 ### Check the PORT (if the ":" is there) | |
452 set u_record [split $urlStr "/"] | |
453 set u_hostname [lindex $u_record 2] | |
454 set u_port [lindex [split $u_hostname ":"] end] | |
455 | |
456 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} { | |
457 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port" | |
458 return 0 | |
459 } | |
460 | |
461 # Default to port 80 (HTTP) | |
462 if {![urllog_isnumber $u_port]} { | |
463 set u_port 80 | |
464 } | |
465 | |
466 ### Is it a http or ftp url? (FIX ME!) | |
467 if {[string range $urlStr 0 3] != "http" && [string range $urlStr 0 2] != "ftp"} { | |
468 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED TYPE (not HTTP or FTP)" | |
469 return 0 | |
470 } | |
471 | |
472 ### Check the Top Level Domain (TLD) validity | |
473 if {$u_checktld != 0} { | |
474 set u_sane [lindex [split $u_hostname "."] end] | |
475 set u_tld [lindex [split $u_sane ":"] 0] | |
476 set u_found 0 | |
477 | |
478 if {[string length $u_tld] == 2} { | |
479 # Assume all 2-letter domains to be valid :) | |
480 set u_found 1 | |
481 } else { | |
482 # Check our list of known TLDs | |
483 foreach itld $urllog_tlds { | |
484 if {[string match $itld $u_tld]} { | |
485 set u_found 1 | |
486 } | |
487 } | |
488 } | |
489 | |
490 if {$u_found == 0} { | |
491 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld." | |
492 return 0 | |
493 } | |
494 } | |
495 | |
496 set urlStr [string map $urllog_httprep $urlStr] | |
497 | |
498 ### Do we perform additional optional checks? | |
499 if {$urllog_check != 0 && [string range $urlStr 0 6] == "http://"} { | |
500 # Does the document pointed by the URL exist? | |
501 if {[catch {set utoken [::http::geturl $urlStr -progress urllog_http_handler -blocksize 1024 -timeout 4000]} uerrmsg]} { | |
502 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" | |
503 urllog_log "HTTP request failed: $uerrmsg" | |
504 return 0 | |
505 } | |
506 | |
507 upvar #0 $utoken ustate | |
508 | |
509 if {$ustate(status) == "timeout"} { | |
510 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" | |
511 urllog_log "HTTP request timed out ($urlStr)" | |
512 } | |
513 | |
514 if {$ustate(status) == "error"} { | |
515 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" | |
516 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
517 } | |
518 | |
519 # FIXME! Handle redirects! | |
520 if {$ustate(status) == "ok"} { | |
521 if {[::http::ncode $utoken] >= 200 && [::http::ncode $utoken] <= 309} { | |
522 set udata $ustate(body) | |
523 set umatches [regexp -nocase -inline -- "<meta.\*\?content=\".\*\?charset=(\[^\"\]*)\"/>" $udata] | |
524 set uconvert 0 | |
525 if {[llength $umatches] > 0} { | |
526 set uencoding [lindex $umatches 1] | |
527 if {[string length $uencoding] > 3} { | |
528 set uconvert 1 | |
529 } | |
530 } | |
531 | |
532 set umatches [regexp -nocase -inline -- "<title>(.\*\?)</title>" $udata] | |
533 if {[llength $umatches] > 0} { | |
534 set urlTitle [lindex $umatches 1] | |
535 if {$uconvert != 0} { | |
536 set urlTitle [encoding convertfrom $uencoding $urlTitle] | |
537 } | |
538 set urlTitle [urllog_convert_ent $urlTitle] | |
539 regsub -all "(^ *| *$)" $urlTitle "" urlTitle | |
540 } else { | |
541 set urlTitle "" | |
542 } | |
543 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle | |
544 } else { | |
545 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::code $utoken])" | |
546 urllog_log "[::http::code $utoken] - $urlStr" | |
547 } | |
548 } | |
549 | |
550 ::http::cleanup $utoken | |
551 } else { | |
552 # No optional checks, just add the URL | |
553 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" | |
554 } | |
555 } | |
556 | |
557 | |
558 #------------------------------------------------------------------------- | |
559 proc urllog_checkmsg {nick uhost hand chan text} { | |
560 ### Check the nick | |
561 if {$nick == "*"} { | |
562 urllog_log "urllog_checkmsg: nick was wc, this should not happen." | |
563 return 0 | |
564 } | |
565 | |
566 ### Do the URL checking | |
567 foreach istr [split $text " "] { | |
568 if {[string match "*http://*" $istr] || [string match "*ftp://*" $istr] || | |
569 [string match "*www.*" $istr] || [string match "*ftp.*" $istr] || | |
570 [regexp "(ftp|http|https)://\[0-9\]\{1,3\}\\.\[0-9\]\{1,3\}\\.\[0-9\]\{1,3\}\\.\[0-9\]\{1,3\}" $istr imatch]} { | |
571 urllog_checkurl $istr $nick $uhost $chan | |
572 } | |
573 } | |
574 | |
575 return 0 | |
576 } | |
577 | |
578 | |
579 #------------------------------------------------------------------------- | |
580 ### Find from database according to patterns, up to imax results. | |
581 | |
582 proc urllog_urlfind {ipatterns imax} { | |
583 global urllog_file | |
584 | |
585 ### Search the database for pattern | |
586 ### Clear the count, open the URL logfile | |
587 set iresults {} | |
588 set nresults 0 | |
589 set fd [open $urllog_file r] | |
590 | |
591 ### Start searching... | |
592 while {![eof $fd]} { | |
593 | |
594 # Get one URL for inspection | |
595 gets $fd foo | |
596 set irecord [split [string tolower $foo] " "] | |
597 set iurl [lindex $irecord 0] | |
598 set iname [lindex $irecord 2] | |
599 | |
600 # Match with all given patterns and rules | |
601 set imatched 1 | |
602 foreach ipattern $ipatterns { | |
603 set foob [split [string tolower $ipattern] " "] | |
604 set ftoken [lindex $foob 0] | |
605 set fparam [lindex $foob 1] | |
606 set fmatch [string match $fparam $iurl] | |
607 | |
608 if {$ftoken == "+" && $fmatch == 0} { set imatched 0 } | |
609 | |
610 if {$ftoken == "-" && $fmatch == 1} { set imatched 0 } | |
611 | |
612 if {$ftoken == "%" && [string match $fparam $iname] == 0} { set imatched 0 } | |
613 } | |
614 | |
615 # If the all patterns matched, add to the list... | |
616 if {$imatched == 1 && $foo != ""} { | |
617 incr nresults | |
618 lappend iresults $foo | |
619 } | |
620 } | |
621 | |
622 # Close file | |
623 close $fd | |
624 | |
625 # Take only last imax results | |
626 return [lrange $iresults [expr $nresults-$imax] $nresults] | |
627 } | |
628 | |
629 | |
630 #------------------------------------------------------------------------- | |
631 ### Parse arguments, find and show the results | |
632 proc urllog_find {unick uhand uchan utext upublic} { | |
633 global botnick urllog_name urllog_version urllog_shorturl | |
634 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch | |
635 | |
636 ### Parse the given command | |
637 urllog_log "$unick/$uhand searched URL: $utext" | |
638 | |
639 set footokens [split $utext " "] | |
640 foreach ftoken $footokens { | |
641 set foomark [string range $ftoken 0 0] | |
642 set foopat [string range $ftoken 1 end] | |
643 | |
644 if {$foomark == "-" || $foomark == "+" || $foomark == "%" || $foomark == "@"} { | |
645 lappend ipatlist "$foomark *$foopat*" | |
646 } else { | |
647 lappend ipatlist "+ *$ftoken*" | |
648 } | |
649 } | |
650 | |
651 ### Get the matches from database | |
652 | |
653 if {$upublic == 0} { | |
654 set iresults [urllog_urlfind $ipatlist $urllog_showmax_priv] | |
655 } else { | |
656 set iresults [urllog_urlfind $ipatlist $urllog_showmax_pub] | |
657 } | |
658 | |
659 ### Show the results | |
660 if {$iresults != ""} { | |
661 set j 0 | |
662 foreach i $iresults { | |
663 incr j | |
664 set foo [split $i " "] | |
665 set shortURL [lindex $foo 0] | |
666 set shortID [lindex $foo 4] | |
667 | |
668 if {$urllog_shorturl != 0 && $shortID != ""} { | |
669 set shortURL "$shortURL [urllog_get_short $shortID]" | |
670 } | |
671 | |
672 urllog_msg $upublic $unick $uchan "#$j: $shortURL ([lindex $foo 2]@[urllog_ctime [lindex $foo 1]])" | |
673 } | |
674 | |
675 } else { | |
676 # If no URLs were found | |
677 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
678 } | |
679 | |
680 return 0 | |
681 } | |
682 | |
683 | |
684 #------------------------------------------------------------------------- | |
685 ### Finding binded functions | |
686 proc urllog_pub_urlfind {unick uhost uhand uchan utext} { | |
687 | |
688 urllog_find $unick $uhand $uchan $utext 1 | |
689 | |
690 return 0 | |
691 } | |
692 | |
693 | |
694 proc urllog_msg_urlfind {unick uhost uhand utext} { | |
695 | |
696 urllog_find $unick $uhand "" $utext 0 | |
697 | |
698 return 0 | |
699 } | |
700 | |
701 #------------------------------------------------------------------------- | |
702 proc urllog_checkmsg2 {unick uhost uhand utext} { | |
703 | |
704 urllog_checkurl $utext $unick $uhost "#CHANNEL" | |
705 | |
706 return 0 | |
707 } | |
708 | |
709 | |
710 | |
711 # end of script |