annotate convert_urllog_db.tcl @ 178:9b8ec700ede4

Clean up the weather data parser backend a bit.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 03 Jun 2014 15:10:47 +0300
parents 7b03971c6d28
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 #!/usr/bin/tclsh
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
2 # TCL script for converting URLLog v1.x flat file
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
3 # format database to SQLite3 database.
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
4 #
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
5 # Written by Matti 'ccr' Hamalainen <ccr@tnsp.org>
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
6 # (C) Copyright 2011 Tecnic Software productions (TNSP)
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
7 #
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 package require sqlite3
34
dccd6c47f9cd Use special 'source' statement to include util_convert.tcl from same directory where the script calling it resides.
Matti Hamalainen <ccr@tnsp.org>
parents: 33
diff changeset
9 source [file dirname [info script]]/util_convert.tcl
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
11 ### Check commandline arguments
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
12 if {$argc != 2} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
13 puts "Usage: $argv0 <input_flat_file_db> <output_sqlite_db_file>"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
14 exit 0
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
15 }
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
16
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
17 set db_input [lindex $argv 0]
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
18 set db_output [lindex $argv 1]
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
19 set db_table "urls"
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
21 ### Ask for confirmation
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
22 puts "Conversion of '$db_input' to SQLite database '$db_output', to table '$db_table'."
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
23 puts "NOTICE! This WILL destroy the current data in table '$db_table'!"
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
24
33
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
25 if {![confirm_yesno "Proceed"]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
26 exit 0
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
27 }
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
28
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
29 ### Open flatfile for reading
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
30 if {[catch {set fd [open $db_input r]} uerrmsg]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
31 puts "Could not open '$db_input' for reading: $uerrmsg"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
32 exit 1
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 }
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34
33
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
35 ### Open SQLite database, drop old table, create new
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
36 open_db $db_output
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
37 drop_table "urls"
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
38 create_table_urls
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
40 ### Detect URL database version
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
41 puts -nonewline "Detecting database version: "
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
42 set nline 0
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
43 set minentries 9999
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
44 set maxentries 0
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
45 while {![eof $fd]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
46 incr nline
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
47 set line [gets $fd]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
48 if {$line != ""} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
49 set items [split $line " "]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
50 set tmp [llength $items]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
51 if {$tmp > $maxentries} { set maxentries $tmp }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
52 if {$tmp < $minentries} { set minentries $tmp }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
53 }
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
54 }
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
55
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
56 if {$maxentries != 5 || $maxentries != $minentries} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
57 puts "old / variable"
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
58 } else {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
59 puts "new"
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
60 }
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
61
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
62 ### Show some information
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
63 puts "Database contains $nline records, with $minentries / $maxentries entries."
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
64
22
7feaff7cc22e Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 11
diff changeset
65 ### Begin conversion
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
66 puts -nonewline "Converting database, please wait ... round #1 "
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
67 set nline 0
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
68 seek $fd 0 start
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
69 while {![eof $fd]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
70 incr nline
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
71 set line [gets $fd]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
72 set items [split $line " "]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
73 if {$line != ""} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
74 set host [lindex $items 3]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
75 if {[regexp {^\((.+)\)$} $host ures uhost]} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
76 set host $uhost
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
77 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
78 set uid [lindex $items 4]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
79 if {$uid != ""} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
80 set sql "INSERT INTO $db_table (id,utime,url,user,host) VALUES ($uid, [lindex $items 1], '[lescape $items 0]', '[lescape $items 2]', '[escape $host]')"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
81 if {[catch {dbh eval $sql} uerrmsg]} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
82 puts "\nError ($nline): $uerrmsg on:\n$sql"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
83 exit 15
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
84 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
85 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
86 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
87 if {[expr $nline % 10] == 1} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
88 puts -nonewline "."
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
89 flush stdout
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
90 }
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 }
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
92 puts "OK"
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
93
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
94 puts -nonewline "\nRound #2 "
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
95 set nline 0
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
96 seek $fd 0 start
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
97 while {![eof $fd]} {
63
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
98 incr nline
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
99 set line [gets $fd]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
100 set items [split $line " "]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
101 if {$line != ""} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
102 set host [lindex $items 3]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
103 if {[regexp {^\((.+)\)$} $host ures uhost]} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
104 set host $uhost
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
105 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
106 set uid [lindex $items 4]
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
107 if {$uid == ""} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
108 set sql "INSERT INTO $db_table (utime,url,user,host) VALUES ([lindex $items 1], '[lescape $items 0]', '[lescape $items 2]', '[escape $host]')"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
109 if {[catch {dbh eval $sql} uerrmsg]} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
110 puts "\nError ($nline): $uerrmsg on:\n$sql"
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
111 exit 15
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
112 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
113 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
114 }
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
115 if {[expr $nline % 10] == 1} {
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
116 puts -nonewline "."
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
117 flush stdout
7b03971c6d28 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 34
diff changeset
118 }
11
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
119 }
06f5e5b25930 Make the database conversion 2-staged to support old format flatfile database inputs. Add detection for old format.
Matti Hamalainen <ccr@tnsp.org>
parents: 10
diff changeset
120
33
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
121 dbh close
10
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122 close $fd
fbf718c24de4 Add script for converting URLLog text database to SQLite.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
123
33
8a6bfcf1f57b Use the functions that were implemented in the utility library. Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 23
diff changeset
124 puts "DONE."