view convert_urllog_db.tcl @ 178:9b8ec700ede4

Clean up the weather data parser backend a bit.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 03 Jun 2014 15:10:47 +0300
parents 7b03971c6d28
children
line wrap: on
line source

#!/usr/bin/tclsh
# TCL script for converting URLLog v1.x flat file
# format database to SQLite3 database.
#
# Written by Matti 'ccr' Hamalainen <ccr@tnsp.org>
# (C) Copyright 2011 Tecnic Software productions (TNSP)
#
package require sqlite3
source [file dirname [info script]]/util_convert.tcl

### Check commandline arguments
if {$argc != 2} {
  puts "Usage: $argv0 <input_flat_file_db> <output_sqlite_db_file>"
  exit 0
}

set db_input [lindex $argv 0]
set db_output [lindex $argv 1]
set db_table "urls"

### Ask for confirmation
puts "Conversion of '$db_input' to SQLite database '$db_output', to table '$db_table'."
puts "NOTICE! This WILL destroy the current data in table '$db_table'!"

if {![confirm_yesno "Proceed"]} {
  exit 0
}

### Open flatfile for reading
if {[catch {set fd [open $db_input r]} uerrmsg]} {
  puts "Could not open '$db_input' for reading: $uerrmsg"
  exit 1
}

### Open SQLite database, drop old table, create new
open_db $db_output
drop_table "urls"
create_table_urls

### Detect URL database version
puts -nonewline "Detecting database version: "
set nline 0
set minentries 9999
set maxentries 0
while {![eof $fd]} {
  incr nline
  set line [gets $fd]
  if {$line != ""} {
    set items [split $line " "]
    set tmp [llength $items]
    if {$tmp > $maxentries} { set maxentries $tmp }
    if {$tmp < $minentries} { set minentries $tmp }
  }
}

if {$maxentries != 5 || $maxentries != $minentries} {
  puts "old / variable"
} else {
  puts "new"
}

### Show some information
puts "Database contains $nline records, with $minentries / $maxentries entries."

### Begin conversion
puts -nonewline "Converting database, please wait ... round #1 "
set nline 0
seek $fd 0 start
while {![eof $fd]} {
  incr nline
  set line [gets $fd]
  set items [split $line " "]
  if {$line != ""} {
    set host [lindex $items 3]
    if {[regexp {^\((.+)\)$} $host ures uhost]} {
      set host $uhost
    }
    set uid [lindex $items 4]
    if {$uid != ""} {
      set sql "INSERT INTO $db_table (id,utime,url,user,host) VALUES ($uid, [lindex $items 1], '[lescape $items 0]', '[lescape $items 2]', '[escape $host]')"
      if {[catch {dbh eval $sql} uerrmsg]} {
        puts "\nError ($nline): $uerrmsg on:\n$sql"
        exit 15
      }
    }
  }
  if {[expr $nline % 10] == 1} {
    puts -nonewline "."
    flush stdout
  }
}
puts "OK"

puts -nonewline "\nRound #2 "
set nline 0
seek $fd 0 start
while {![eof $fd]} {
  incr nline
  set line [gets $fd]
  set items [split $line " "]
  if {$line != ""} {
    set host [lindex $items 3]
    if {[regexp {^\((.+)\)$} $host ures uhost]} {
      set host $uhost
    }
    set uid [lindex $items 4]
    if {$uid == ""} {
      set sql "INSERT INTO $db_table (utime,url,user,host) VALUES ([lindex $items 1], '[lescape $items 0]', '[lescape $items 2]', '[escape $host]')"
      if {[catch {dbh eval $sql} uerrmsg]} {
        puts "\nError ($nline): $uerrmsg on:\n$sql"
        exit 15
      }
    }
  }
  if {[expr $nline % 10] == 1} {
    puts -nonewline "."
    flush stdout
  }
}

dbh close
close $fd

puts "DONE."