changeset 13:e06d41fb69d5

Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 05 Sep 2011 16:18:11 +0300
parents 4992e3daba55
children 929768c2aa86
files urllog.tcl
diffstat 1 files changed, 53 insertions(+), 101 deletions(-) [+]
line wrap: on
line diff
--- a/urllog.tcl	Mon Sep 05 16:16:40 2011 +0300
+++ b/urllog.tcl	Mon Sep 05 16:18:11 2011 +0300
@@ -1,6 +1,6 @@
 ##########################################################################
 #
-# URLLog v1.99.13 by ccr/TNSP <ccr@tnsp.org>
+# URLLog v2.00.0 by ccr/TNSP <ccr@tnsp.org>
 # (C) Copyright 2000-2011 Tecnic Software productions (TNSP)
 #
 ##########################################################################
@@ -13,6 +13,7 @@
 # available at < http://tnsp.org/egg-tcls/ >
 #
 ##########################################################################
+
 ###
 ### HTTP options
 ###
@@ -30,7 +31,7 @@
 ###
 
 # Filename where the logged URL data goes
-set urllog_file "data.urllog"
+set urllog_db_file "urllog.sqlite"
 
 
 # 1 = Verbose: Say messages when URL is OK, bad, etc.
@@ -129,21 +130,10 @@
 
 set urllog_html_ent [split "&rlm;||&#8212;|-|&#x202a;||&#x202c;||&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&#xe4;|ä|&#xf6;|ö|&#228;|ä|&#246;|ö|&nbsp;| |&#45;|-|&#8221;|\"|&#8220;|\"|&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>|ä|ä|ö|ö|Ä|Ä" "|"]
 
-
-### HTTP module initialization
+### Require packages
+package require sqlite3
 package require http
 
-
-::http::config -useragent "$urllog_name/$urllog_version"
-if {$http_proxy != 0} {
-	::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
-}
-
-if {$http_tls_support != 0} {
-	package require tls
-	::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"]
-}
-
 ### Binding initializations
 if {$urllog_search != 0} {
 	bind pub - !urlfind urllog_pub_urlfind
@@ -159,6 +149,24 @@
 set urllog_message "$urllog_name v$urllog_version (C) 2000-2011 ccr/TNSP"
 putlog "$urllog_message"
 
+### HTTP module initialization
+::http::config -useragent "$urllog_name/$urllog_version"
+if {$http_proxy != 0} {
+	::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
+}
+
+if {$http_tls_support != 0} {
+	package require tls
+	::http::register https 443 [list ::tls::socket -request 1 -require 1 -cadir "/etc/certs/"]
+}
+
+### SQLite database initialization
+if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} {
+	putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg"
+	exit 2
+}
+
+
 if {$http_proxy != 0} {
 	putlog " (Using proxy $http_proxy_host:$http_proxy_port)"
 }
@@ -175,7 +183,6 @@
 	putlog " (Search commands enabled)"
 }
 
-
 #-------------------------------------------------------------------------
 ### Utility functions
 proc urllog_log {arg} {
@@ -239,9 +246,14 @@
 }
 
 
+proc urllog_escape { str } {
+	return [string map {' ''} $str]
+}
+
 #-------------------------------------------------------------------------
 proc urllog_get_short {utime} {
-	global urllog_shorturl urllog_shorturl_prefix
+	global urllog_shorturl urllog_shorturl_prefix urllog_shorturl
+
 	set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
 	set ulen [string length $ustr]
 
@@ -266,46 +278,33 @@
 
 #-------------------------------------------------------------------------
 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} {
-	global urlmsg_alreadyknown urllog_file urllog_shorturl
+	global urldb urlmsg_alreadyknown urllog_shorturl
 
  	### Let's check if we already know the URL
-	set fd [open $urllog_file a+]
-	close $fd
-
-	set fd [open $urllog_file r]
-	set urlID -1
-	while {![eof $fd]} {
-		set qitems [split [gets $fd] " "]
-		set qindex [lindex $qitems 4]
-		if {$qindex != "" && $qindex > $urlID} {
-			set urlID $qindex
+ 	set tmpURL [urllog_escape $urlStr]
+	urldb eval {SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls WHERE url='$tmpURL'} {
+		urllog_log "URL said by $urlNick ($urlStr) already known"
+		if {$urllog_shorturl != 0} {
+			set qstr "[urllog_get_short $urlID] "
+		} else {
+			set qstr ""
 		}
-		if {[lindex $qitems 0] == $urlStr} {
-			urllog_log "URL said by $urlNick ($urlStr) already known"
-			if {$urllog_shorturl != 0} {
-				set qstr "[urllog_get_short $urlID] "
-			} else {
-				set qstr ""
-			}
-			append qstr "([lindex $qitems 2]@[urllog_ctime [lindex $qitems 1]])"
-			if {[string length $urlTitle] > 0} {
-				set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr"
-			} else {
-				set qstr "$urlmsg_alreadyknown $qstr"
-			}
-			urllog_verb_msg $urlNick $urlChan $qstr
-			return 0
+		append qstr "($uuser@[urllog_ctime $utime])"
+		if {[string length $urlTitle] > 0} {
+			set qstr "$urlmsg_alreadyknown - '$urlTitle' $qstr"
+		} else {
+			set qstr "$urlmsg_alreadyknown $qstr"
 		}
+		urllog_verb_msg $urlNick $urlChan $qstr
+		return 0
 	}
-	close $fd
-
 
 	### OK, the URL was not already known - thus we add it
-	incr urlID
-	set urlTime [unixtime]
-	set fd [open $urllog_file a+]
-	puts $fd "$urlStr $urlTime $urlNick ($urlHost) $urlID"
-	close $fd
+	set sql "INSERT INTO urls (utime,url,user,host) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]')"
+	if {[catch {urldb eval $sql} uerrmsg]} {
+		urllog_log "$uerrmsg on SQL:\n$sql"
+		return 0
+	}
 	urllog_log "Added URL ($urlNick@$urlChan): $urlStr"
 
 
@@ -338,9 +337,9 @@
 
 #-------------------------------------------------------------------------
 proc urllog_checkurl {urlStr urlNick urlHost urlChan} {
-	global botnick urllog_html urllog_tlds urllog_check urllog_file
-	global urlmsg_nosuchhost urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc
-	global urllog_httprep urllog_shorturl_prefix urllog_shorturl urllog_encoding
+	global urllog_tlds urllog_check urlmsg_nosuchhost urlmsg_ioerror
+	global urlmsg_timeout urlmsg_errorgettingdoc urllog_httprep
+	global urllog_shorturl_prefix urllog_shorturl urllog_encoding
 
 	### Print status to bot's log
 	urllog_log "$urlStr ($urlNick@$urlChan)"
@@ -524,56 +523,9 @@
 
 
 #-------------------------------------------------------------------------
-### Find from database according to patterns, up to imax results.
-proc urllog_urlfind {ipatterns imax} {
-	global urllog_file
-
-	### Search the database for pattern
-	### Clear the count, open the URL logfile
-	set iresults {}
-	set nresults 0
-	set fd [open $urllog_file r]
-
-	### Start searching...
-	while {![eof $fd]} {
-
-		# Get one URL for inspection
-		gets $fd foo
-		set irecord [split [string tolower $foo] " "]
-		set iurl [lindex $irecord 0]
-		set iname [lindex $irecord 2]
-
-		# Match with all given patterns and rules
-		set imatched 1
-		foreach ipattern $ipatterns {
-			set foob [split [string tolower $ipattern] " "]
-			set ftoken [lindex $foob 0]
-			set fparam [lindex $foob 1]
-			set fmatch [string match $fparam $iurl]
-			if {$ftoken == "+" && $fmatch == 0} { set imatched 0 }
-			if {$ftoken == "-" && $fmatch == 1} { set imatched 0 }
-			if {$ftoken == "%" && [string match $fparam $iname] == 0} { set imatched 0 }
-		}
-
-		# If the patterns matched, add to the results list
-		if {$imatched == 1 && $foo != ""} {
-			incr nresults
-			lappend iresults $foo
-		}
-	}
-
-	# Close file
-	close $fd
-
-	# Take only last imax results
-	return [lrange $iresults [expr $nresults - $imax] $nresults]
-}
-
-
-#-------------------------------------------------------------------------
 ### Parse arguments, find and show the results
 proc urllog_find {unick uhand uchan utext upublic} {
-	global botnick urllog_name urllog_version urllog_shorturl
+	global urllog_name urllog_version urllog_shorturl urldb
 	global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch
 
 	### Parse the given command