view convert_urllog_db.tcl @ 22:7feaff7cc22e

Cleanups.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 05 Sep 2011 18:37:33 +0300
parents 06f5e5b25930
children 9ba859d9f931
line wrap: on
line source

#!/usr/bin/tclsh
# TCL script for converting URLLog v1.x flat file
# format database to SQLite3 database.
#
# Written by Matti 'ccr' Hamalainen <ccr@tnsp.org>
# (C) Copyright 2011 Tecnic Software productions (TNSP)
#
package require sqlite3

### Check commandline arguments
if {$argc != 2} {
	puts "Usage: $argv0 <input_flat_file_db> <output_sqlite_db_file>"
	exit 0
}

set db_input [lindex $argv 0]
set db_output [lindex $argv 1]
set db_table "urls"


### Ask for confirmation
puts "Conversion of '$db_input' to SQLite database '$db_output', to table '$db_table'."
puts "NOTICE! This WILL destroy the current data in table '$db_table'!"

puts -nonewline "Proceed \[y/N\]? "
flush stdout
set response [gets stdin]
if {[string tolower $response] != "y"} {
	puts "OK, aborting."
	exit 0
}

### Helper functions
proc escape { str } {
	return [string map {' ''} $str]
}

proc lescape { lst pos } {
	return [escape [lindex $lst $pos]]
}


### Open flatfile for reading
if {[catch {set fd [open $db_input r]} uerrmsg]} {
	puts "Could not open '$db_input' for reading: $uerrmsg"
	exit 1
}

### Open SQLite database
if {[catch {sqlite3 urldb $db_output} uerrmsg]} {
	puts "Could not open SQLite3 database '$db_output': $uerrmsg"
	exit 2
}

### Drop old table
puts "Dropping current '$db_table' table."
if {[catch {urldb eval {DROP TABLE $db_table}} uerrmsg]} {
	puts "Dropping table resulted in error (ignored): $uerrmsg"
}

### Create new
puts "Creating new table '$db_table'."
if {[catch {urldb eval "CREATE TABLE $db_table (id INTEGER PRIMARY KEY AUTOINCREMENT, utime INT, url VARCHAR(2048), user VARCHAR(32), host VARCHAR(256), chan VARCHAR(32))}} uerrmsg]" {
	puts "Error creating table! $uerrmsg"
	exit 3
}

### Detect URL database version
puts -nonewline "Detecting database version: "
set nline 0
set minentries 9999
set maxentries 0
while {![eof $fd]} {
	incr nline
	set line [gets $fd]
	if {$line != ""} {
		set items [split $line " "]
		set tmp [llength $items]
		if {$tmp > $maxentries} { set maxentries $tmp }
		if {$tmp < $minentries} { set minentries $tmp }
	}
}

if {$maxentries != 5 || $maxentries != $minentries} {
	puts "old / variable"
} else {
	puts "new"
}

### Show some information
puts "Database contains $nline records, with $minentries / $maxentries entries."


### Begin conversion
puts -nonewline "Converting database, please wait ... round #1 "
set nline 0
seek $fd 0 start
while {![eof $fd]} {
	incr nline
	set line [gets $fd]
	set items [split $line " "]
	if {$line != ""} {
		set host [lindex $items 3]
		if {[regexp {^\((.+)\)$} $host ures uhost]} {
			set host $uhost
		}
		set uid [lindex $items 4]
		if {$uid != ""} {
			set sql "INSERT INTO $db_table (id,utime,url,user,host) VALUES ($uid, [lindex $items 1], '[lescape $items 0]', '[lescape $items 2]', '[escape $host]')"
			if {[catch {urldb eval $sql} uerrmsg]} {
				puts "\nError ($nline): $uerrmsg on:\n$sql"
				exit 15
			}
		}
	}
	if {[expr $nline % 10] == 1} {
		puts -nonewline "."
		flush stdout
	}
}
puts "OK"

puts -nonewline "\nRound #2 "
set nline 0
seek $fd 0 start
while {![eof $fd]} {
	incr nline
	set line [gets $fd]
	set items [split $line " "]
	if {$line != ""} {
		set host [lindex $items 3]
		if {[regexp {^\((.+)\)$} $host ures uhost]} {
			set host $uhost
		}
		set uid [lindex $items 4]
		if {$uid == ""} {
			set sql "INSERT INTO $db_table (utime,url,user,host) VALUES ([lindex $items 1], '[lescape $items 0]', '[lescape $items 2]', '[escape $host]')"
			if {[catch {urldb eval $sql} uerrmsg]} {
				puts "\nError ($nline): $uerrmsg on:\n$sql"
				exit 15
			}
		}
	}
	if {[expr $nline % 10] == 1} {
		puts -nonewline "."
		flush stdout
	}
}

urldb close
close $fd

puts "OK"
puts "New SQLite3 database is in file '$db_output'"