annotate urllog.tcl @ 289:5067843cee3d

urllog: Move some initialization messages around.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 25 Jan 2015 15:34:50 +0200
parents d62280f2a9c7
children 54d34d086b47
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 #
253
dd30f2eaabd3 urllog: Bump version.
Matti Hamalainen <ccr@tnsp.org>
parents: 252
diff changeset
3 # URLLog v2.4.0 by Matti 'ccr' Hamalainen <ccr@tnsp.org>
250
e706f1cdebb4 urllog: Update copyright.
Matti Hamalainen <ccr@tnsp.org>
parents: 249
diff changeset
4 # (C) Copyright 2000-2015 Tecnic Software productions (TNSP)
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 #
113
077c7383f36f urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents: 112
diff changeset
6 # This script is freely distributable under GNU GPL (version 2) license.
077c7383f36f urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents: 112
diff changeset
7 #
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 #
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
10 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database
81
17e542b7985a urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 73
diff changeset
11 # This script requires SQLite TCL extension. Under Debian, you need:
17e542b7985a urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 73
diff changeset
12 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course)
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
13 #
81
17e542b7985a urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 73
diff changeset
14 # NOTICE! If you are upgrading to URLLog v2.0+ from any 1.x version, you
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
15 # may want to run a conversion script against your URL-database file,
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
16 # if you wish to preserve the old data.
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 #
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
18 # See convert_urllog_db.tcl for more information.
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
19 #
81
17e542b7985a urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 73
diff changeset
20 # If you are doing a fresh install, you will need to create the
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
21 # initial SQLite3 database with the required table schemas. You
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
22 # can do that by running: create_urllog_db.tcl
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 #
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 ##########################################################################
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
25
263
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
26 ### The configuration should be in config.urllog in same directory
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
27 ### as this script. Or change the line below to point where ever
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
28 ### you wish. See "config.urllog.example" for an example config file.
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
29 source [file dirname [info script]]/config.urllog
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
30
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 # No need to look below this line
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35 set urllog_name "URLLog"
253
dd30f2eaabd3 urllog: Bump version.
Matti Hamalainen <ccr@tnsp.org>
parents: 252
diff changeset
36 set urllog_version "2.4.0"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
38 set urllog_tlds [split $urllog_tlds ","]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"]
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
40
102
5425dc418505 urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents: 101
diff changeset
41
131
b04ecf8bfb15 urllog: Fix some entity translations.
Matti Hamalainen <ccr@tnsp.org>
parents: 129
diff changeset
42 set urllog_ent_str "&#45;|-|&#39;|'|—|-|&rlm;||&#8212;|-|&#8211;|--|&#x202a;||&#x202c;|"
b04ecf8bfb15 urllog: Fix some entity translations.
Matti Hamalainen <ccr@tnsp.org>
parents: 129
diff changeset
43 append urllog_ent_str "|&lrm;||&aring;|å|&Aring;|Å|&eacute;|é|&#58;|:|&nbsp;| "
133
9f290e1ee738 urllog: Oops, bugfix 10L.
Matti Hamalainen <ccr@tnsp.org>
parents: 132
diff changeset
44 append urllog_ent_str "|&#8221;|\"|&#8220;|\"|&laquo;|<<|&raquo;|>>|&quot;|\""
131
b04ecf8bfb15 urllog: Fix some entity translations.
Matti Hamalainen <ccr@tnsp.org>
parents: 129
diff changeset
45 append urllog_ent_str "|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>"
b04ecf8bfb15 urllog: Fix some entity translations.
Matti Hamalainen <ccr@tnsp.org>
parents: 129
diff changeset
46 append urllog_ent_str "|&#228;|ä|&#229;|ö|&mdash;|-|&#039;|'|&ndash;|-|&#034;|\""
b04ecf8bfb15 urllog: Fix some entity translations.
Matti Hamalainen <ccr@tnsp.org>
parents: 129
diff changeset
47 append urllog_ent_str "|&#124;|-|&#8217;|'|&uuml;|ü|&Uuml;|Ü|&bull;|*|&euro;|€"
223
606c2a48b2ce urllog: Add one entity translation.
Matti Hamalainen <ccr@tnsp.org>
parents: 222
diff changeset
48 append urllog_ent_str "|&rdquo;|\"|&#8216;|'"
102
5425dc418505 urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents: 101
diff changeset
49 set urllog_html_ent [split [encoding convertfrom "utf-8" $urllog_ent_str] "|"]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
50
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
51 ### Require packages
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
52 package require sqlite3
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53 package require http
7
50b52294e93e urllog: Strip &rlm; entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents: 4
diff changeset
54
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
55 ### Binding initializations
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
56 bind pub - !urlfind urllog_pub_urlfind
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
57 bind msg - !urlfind urllog_msg_urlfind
249
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
58 bind pubm - *.* urllog_check_line
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
59 bind topc - *.* urllog_check_line
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
60
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
61
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
62 ### Initialization messages
250
e706f1cdebb4 urllog: Update copyright.
Matti Hamalainen <ccr@tnsp.org>
parents: 249
diff changeset
63 set urllog_message "$urllog_name v$urllog_version (C) 2000-2015 ccr/TNSP"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
64 putlog "$urllog_message"
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
65
289
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
66 ### Miscellaneous init messages
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
67 if {$urllog_extra_checks != 0} {
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
68 putlog " (Additional URL validity checks enabled)"
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
69 }
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
70
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
71 if {$urllog_verbose != 0} {
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
72 putlog " (Verbose mode enabled)"
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
73 }
5067843cee3d urllog: Move some initialization messages around.
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
74
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
75 ### HTTP module initialization
269
d62280f2a9c7 urllog: Make user agent string configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 267
diff changeset
76 if {[info exists http_user_agent] && $http_user_agent != ""} {
d62280f2a9c7 urllog: Make user agent string configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 267
diff changeset
77 ::http::config -useragent $http_user_agent
d62280f2a9c7 urllog: Make user agent string configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 267
diff changeset
78 } else {
d62280f2a9c7 urllog: Make user agent string configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 267
diff changeset
79 ::http::config -useragent "$urllog_name/$urllog_version"
d62280f2a9c7 urllog: Make user agent string configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 267
diff changeset
80 }
d62280f2a9c7 urllog: Make user agent string configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 267
diff changeset
81
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
82 if {[info exists http_use_proxy] && $http_use_proxy != 0} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
83 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
84 putlog " (Using proxy $http_proxy_host:$http_proxy_port)"
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
85 }
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
86
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
87 if {[info exists http_tls_support] && $http_tls_support != 0} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
88 package require tls
235
059660980388 urllog: Enable TLS, fixes annoying issues where https fails.
Matti Hamalainen <ccr@tnsp.org>
parents: 230
diff changeset
89 ::http::register https 443 [list ::tls::socket -request 1 -require 1 -tls1 1 -cadir $http_tls_cadir]
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
90 putlog " (TLS/SSL support enabled)"
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
91 }
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
92
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
93 ### SQLite database initialization
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
94 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
95 putlog " Could not open SQLite3 database '$urllog_db_file': $uerrmsg"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
96 exit 2
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
97 }
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
98
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
99
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
101 ### Utility functions
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102 proc urllog_log {arg} {
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
103 global urllog_log_enable urllog_name
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
104
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
105 if {$urllog_log_enable != 0} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
106 putlog "$urllog_name: $arg"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
107 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
108 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
109
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
110
152
cbee8ca52eb8 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 150
diff changeset
111 proc urllog_ctime {utime} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
112 if {$utime == "" || $utime == "*"} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
113 set utime 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
114 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
115 return [clock format $utime -format "%d.%m.%Y %H:%M"]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
116 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
118
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
119 proc urllog_isnumber {uarg} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
120 foreach i [split $uarg {}] {
65
31c8c4f50aa6 urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents: 62
diff changeset
121 if {![string match \[0-9\] $i]} { return 0 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
122 }
65
31c8c4f50aa6 urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents: 62
diff changeset
123 return 1
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
124 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127 proc urllog_msg {apublic anick achan amsg} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
128 global urllog_preferredmsg
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
130 if {$apublic == 1} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
131 putserv "$urllog_preferredmsg $achan :$amsg"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
132 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
133 putserv "$urllog_preferredmsg $anick :$amsg"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
134 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
135 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
136
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
137
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138 proc urllog_verb_msg {anick achan amsg} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
139 global urllog_verbose
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
140
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
141 if {$urllog_verbose != 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
142 urllog_msg 1 $anick $achan $amsg
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
143 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
144 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
145
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 proc urllog_convert_ent {udata} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
148 global urllog_html_ent
115
5db02af76016 urllog: Improve entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 114
diff changeset
149 return [string map -nocase $urllog_html_ent [string map $urllog_html_ent $udata]]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
151
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
152
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
153 proc urllog_escape { str } {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
154 return [string map {' ''} $str]
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
155 }
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
156
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
157
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
158 proc urllog_sanitize_encoding {uencoding} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
159 regsub -- "^\[a-z\]\[a-z\]_\[A-Z\]\[A-Z\]\." $uencoding "" uencoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
160 set uencoding [string tolower $uencoding]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
161 regsub -- "^iso-" $uencoding "iso" uencoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
162 return $uencoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
163 }
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
164
121
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
165 proc urllog_clean_title {utitle} {
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
166 if {[catch {set utitle [encoding convertto "iso8859-15" $utitle]} cerrmsg]} {
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
167 putlog "Could not convert title encoding: $cerrmsg"
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
168 }
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
169 return $utitle
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
170 }
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
171
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
172 #-------------------------------------------------------------------------
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
173 set urllog_shorturl_str "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
174
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
175 proc urllog_get_short {utime} {
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
176 global urllog_shorturl_prefix urllog_shorturl_str
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
177
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
178 set ulen [string length $urllog_shorturl_str]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
179
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
180 set u1 [expr $utime / ($ulen * $ulen)]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
181 set utmp [expr $utime % ($ulen * $ulen)]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
182 set u2 [expr $utmp / $ulen]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
183 set u3 [expr $utmp % $ulen]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
184
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
185 return "\[ $urllog_shorturl_prefix[string index $urllog_shorturl_str $u1][string index $urllog_shorturl_str $u2][string index $urllog_shorturl_str $u3] \]"
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
186 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
188
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
189 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190 proc urllog_chop_url {url} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
191 global urllog_shorturl_orig
68
3762c621d1c3 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 65
diff changeset
192
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
193 if {[string length $url] > $urllog_shorturl_orig} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
194 return "[string range $url 0 $urllog_shorturl_orig]..."
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
195 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
196 return $url
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
197 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
198 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
199
241
669842725e2f Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 240
diff changeset
200
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
201 #-------------------------------------------------------------------------
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
202 proc urllog_exists {urlStr urlNick urlHost urlChan} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
203 global urldb urlmsg_alreadyknown urllog_shorturl
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
204
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
205 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[urllog_escape $urlStr]'"
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
206 urldb eval $usql {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
207 urllog_log "URL said by $urlNick ($urlStr) already known"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
208 if {$urllog_shorturl != 0} {
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
209 set qstr "[urllog_get_short $uid] "
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
210 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
211 set qstr ""
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
212 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
213 append qstr "($uuser/$uchan@[urllog_ctime $utime])"
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
214 if {[string length $utitle] > 0} {
121
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
215 set qstr "$urlmsg_alreadyknown - '[urllog_clean_title $utitle]' $qstr"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
216 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
217 set qstr "$urlmsg_alreadyknown $qstr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
218 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
219 urllog_verb_msg $urlNick $urlChan $qstr
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
220 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
221 }
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
222 return 1
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
223 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
224
18
1e2232135354 More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents: 13
diff changeset
225
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
226 #-------------------------------------------------------------------------
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
227 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} {
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
228 global urldb urllog_shorturl
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
229
93
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
230 if {$urlTitle == ""} {
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
231 set uins "NULL"
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
232 } else {
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
233 set uins "'[urllog_escape $urlTitle]'"
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
234 }
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
235 set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[urllog_escape $urlStr]', '[urllog_escape $urlNick]', '[urllog_escape $urlHost]', '[urllog_escape $urlChan]', $uins)"
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
236 if {[catch {urldb eval $usql} uerrmsg]} {
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
237 urllog_log "$uerrmsg on SQL:\n$usql"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
238 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
239 }
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
240 set uid [urldb last_insert_rowid]
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
241 urllog_log "Added URL ($urlNick@$urlChan): $urlStr"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
242
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
243
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
244 ### Let's say something, to confirm that everything went well.
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
245 if {$urllog_shorturl != 0} {
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
246 set qstr "[urllog_get_short $uid] "
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
247 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
248 set qstr ""
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
249 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
250 if {[string length $urlTitle] > 0} {
121
bec98a9f8695 Convert the title encoding when outputting to channel.
Matti Hamalainen <ccr@tnsp.org>
parents: 120
diff changeset
251 urllog_verb_msg $urlNick $urlChan "'[urllog_clean_title $urlTitle]' ([urllog_chop_url $urlStr]) $qstr"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
252 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
253 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
254 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
255
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
256 return 1
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
257 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
258
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
259
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
260 #-------------------------------------------------------------------------
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
261 proc urllog_dorequest { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
262 global urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
263
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
264 upvar 1 $urlStatus ustatus
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
265 upvar 1 $urlSCode uscode
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
266 upvar 1 $urlCode ucode
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
267 upvar 1 $urlData udata
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
268 upvar 1 $urlMeta umeta
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
269
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
270 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -headers {Accept-Encoding identity}]} uerrmsg]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
271 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
272 urllog_log "HTTP request failed: $uerrmsg"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
273 return 0
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
274 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
275
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
276 set ustatus [::http::status $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
277 if {$ustatus == "timeout"} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
278 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
279 urllog_log "HTTP request timed out ($urlStr)"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
280 return 0
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
281 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
282
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
283 if {$ustatus != "ok"} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
284 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
285 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
286 return 0
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
287 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
288
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
289 set ustatus [::http::status $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
290 set uscode [::http::code $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
291 set ucode [::http::ncode $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
292 set udata [::http::data $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
293 array set umeta [::http::meta $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
294 ::http::cleanup $utoken
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
295
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
296 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
297 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
298
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
299 #-------------------------------------------------------------------------
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
300 proc urllog_validate_url { urlNick urlChan urlMStr urlMProto } {
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
301 global urllog_tlds urllog_extra_checks urlmsg_nosuchhost urllog_httprep
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
302 global urllog_shorturl_prefix urllog_shorturl
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
303 upvar 1 $urlMStr urlStr
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
304 upvar 1 $urlMProto urlProto
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
305
96
e5a6c27be365 urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 95
diff changeset
306 ### Try to guess the URL protocol component (if it is missing)
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
307 set u_checktld 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
308 if {[string match "*www.*" $urlStr] && ![string match "http://*" $urlStr] && ![string match "https://*" $urlStr]} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
309 set urlStr "http://$urlStr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
310 } elseif {[string match "*ftp.*" $urlStr] && ![string match "ftp://*" $urlStr]} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
311 set urlStr "ftp://$urlStr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
312 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
313
95
687bdd74dfac urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents: 93
diff changeset
314 ### Handle URLs that have an IPv4-address
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
315 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr u_match urlProto ni1 ni2 ni3 ni4]} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
316 # Check if the IP is on local network
92
f6f4595856ff urllog: Cosmetics. Remove useless parenthesis.
Matti Hamalainen <ccr@tnsp.org>
parents: 91
diff changeset
317 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168) || $ni1 == 0} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
318 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)."
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
319 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
320 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
321 # Skip TLD check for URLs with IP address
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
322 set u_checktld 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
323 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
324
96
e5a6c27be365 urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 95
diff changeset
325 ### Check now if we have an ShortURL here ...
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
326 if {[string match "$urllog_shorturl_prefix*" $urlStr]} {
98
fbbe7ee40e2f urllog: Improve one informational / error message.
Matti Hamalainen <ccr@tnsp.org>
parents: 97
diff changeset
327 urllog_log "Ignoring ShortURL from $urlNick: $urlStr"
252
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
328 # set uud ""
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
329 # set usql "SELECT id AS uid, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE utime=$uud"
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
330 # urldb eval $usql {
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
331 # urllog_verb_msg $urlNick $urlChan "'$utitle' - $uurl"
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
332 # return 1
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
333 # }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
334 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
335 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
336
95
687bdd74dfac urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents: 93
diff changeset
337 ### Get URL protocol component
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
338 set urlProto ""
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
339 regexp "(\[a-z\]+)://" $urlStr u_match urlProto
95
687bdd74dfac urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents: 93
diff changeset
340
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
341 ### Check the PORT (if the ":" is there)
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
342 set u_record [split $urlStr "/"]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
343 set u_hostname [lindex $u_record 2]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
344 set u_port [lindex [split $u_hostname ":"] end]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
345
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
346 if {![urllog_isnumber $u_port] && $u_port != "" && $u_port != $u_hostname} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
347 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $u_port"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
348 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
349 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
350
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
351 ### Is it a http or ftp url? (FIX ME!)
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
352 if {$urlProto != "http" && $urlProto != "https" && $urlProto != "ftp"} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
353 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED protocol class ($urlProto)."
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
354 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
355 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
356
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
357 ### Check the Top Level Domain (TLD) validity
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
358 if {$u_checktld != 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
359 set u_sane [lindex [split $u_hostname "."] end]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
360 set u_tld [lindex [split $u_sane ":"] 0]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
361 set u_found 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
362
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
363 if {[string length $u_tld] == 2} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
364 # Assume all 2-letter domains to be valid :)
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
365 set u_found 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
366 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
367 # Check our list of known TLDs
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
368 foreach itld $urllog_tlds {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
369 if {[string match $itld $u_tld]} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
370 set u_found 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
371 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
372 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
373 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
374
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
375 if {$u_found == 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
376 urllog_log "Broken URL from $urlNick: ($urlStr) illegal TLD: $u_tld."
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
377 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
378 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
379 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
380
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
381 set urlStr [string map $urllog_httprep $urlStr]
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
382 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
383 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
384
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
385
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
386 #-------------------------------------------------------------------------
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
387 proc urllog_check_url {urlStr urlNick urlHost urlChan} {
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
388 global urllog_encoding http_tls_support urlmsg_errorgettingdoc urllog_extra_checks
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
389
91
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
390 ### Does the URL already exist?
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
391 if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} {
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
392 return 1
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
393 }
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
394
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
395 ### Validate URL compoments, etc.
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
396 set u_proto ""
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
397 if {![urllog_validate_url $urlNick $urlChan urlStr u_proto]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
398 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
399 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
400
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
401 ### Do we perform additional checks?
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
402 if {$urllog_extra_checks == 0 || !(($http_tls_support != 0 && $u_proto == "https") || $u_proto == "http")} {
230
3e3756b113a1 urllog: Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 229
diff changeset
403 # No optional checks, or it's not http/https.
3e3756b113a1 urllog: Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 229
diff changeset
404 # Just add the URL, if it does not exist already.
91
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
405 urllog_addurl $urlStr $urlNick $urlHost $urlChan ""
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
406 return 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
407 }
7
50b52294e93e urllog: Strip &rlm; entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents: 4
diff changeset
408
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
409 ### Does the document pointed by the URL exist?
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
410 if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
411 return 1
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
412 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
413
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
414 ### Handle redirects
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
415 if {$ucode >= 301 && $ucode <= 302} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
416 set nurlStr $umeta(Location)
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
417 urllog_log "Redirection: $urlStr -> $nurlStr"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
418 set urlStr $nurlStr
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
419
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
420 if {![urllog_validate_url $urlNick $urlChan urlStr urlProto]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
421 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
422 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
423
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
424 if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
425 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
426 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
427 }
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
428
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
429 ### Handle 2nd level redirects
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
430 if {$ucode >= 301 && $ucode <= 302} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
431 set nurlStr $umeta(Location)
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
432 urllog_log "Redirection #2: $urlStr -> $nurlStr"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
433 set urlStr $nurlStr
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
434
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
435 if {![urllog_validate_url $urlNick $urlChan urlStr urlProto]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
436 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
437 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
438
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
439 if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
440 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
441 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
442 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
443
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
444 # Final document
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
445 if {$ucode >= 200 && $ucode <= 205} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
446 set uenc_doc ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
447 set uenc_http ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
448 set uencoding ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
449
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
450 # Get information about specified character encodings
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
451 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(Content-Type) umatches uenc_http]} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
452 # Found character set encoding information in HTTP headers
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
453 }
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
454
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
455 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
456 # Found old style HTML meta tag with character set information
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
457 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
458 # Found HTML5 style meta tag with character set information
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
459 }
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
460
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
461 # Make sanitized versions of the encoding strings
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
462 set uenc_http2 [urllog_sanitize_encoding $uenc_http]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
463 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
464
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
465 # KLUDGE!
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
466 set uencoding $uenc_http2
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
467
210
52cadf5a12b6 urllog: Disable some debug logging.
Matti Hamalainen <ccr@tnsp.org>
parents: 209
diff changeset
468 # putlog "got charsets : http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2'"
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
469
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
470
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
471 # Check if the document has specified encoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
472 if {$uenc_doc != ""} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
473 # Does it differ from what HTTP says?
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
474 if {$uenc_http != "" && $uenc_doc != $uenc_http && $uenc_doc2 != $uenc_http2} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
475 # Yes, we will try reconverting
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
476 set uencoding $uenc_doc2
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
477 }
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
478 } elseif {$uenc_http == ""} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
479 # If _NO_ known encoding of any kind, assume the default of iso8859-1
86
4c2b6482c08c urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 84
diff changeset
480 set uencoding "iso8859-1"
4c2b6482c08c urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 84
diff changeset
481 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
482
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
483 # Get the document title, if any
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
484 set urlTitle ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
485 if {[regexp -nocase -- "<title>(.\*\?)</title>" $udata umatches urlTitle]} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
486 # If character set conversion is required, do it now
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
487 if {$uencoding != ""} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
488 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
489 urllog_log "Error in charset conversion: $cerrmsg"
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
490 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
491 }
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
492
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
493 # Convert some HTML entities to plaintext and do some cleanup
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
494 set utmp [urllog_convert_ent $urlTitle]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
495 regsub -all "\r|\n|\t" $utmp " " utmp
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
496 regsub -all " *" $utmp " " utmp
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
497 set urlTitle [string trim $utmp]
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
498 }
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
499
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
500 # Rasiatube hack
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
501 if {[string match "*/rasiatube/view*" $urlStr]} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
502 set rasia 0
118
e5f2961a6145 urllog: Improve rasiatube URL de-mangling.
Matti Hamalainen <ccr@tnsp.org>
parents: 117
diff changeset
503 if {[regexp -nocase -- "<link rel=\"video_src\"\.\*\?file=(http://\[^&\]+)&" $udata umatches utmp]} {
e5f2961a6145 urllog: Improve rasiatube URL de-mangling.
Matti Hamalainen <ccr@tnsp.org>
parents: 117
diff changeset
504 regsub -all "\/v\/" $utmp "\/watch\?v=" urlStr
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
505 set rasia 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
506 } else {
118
e5f2961a6145 urllog: Improve rasiatube URL de-mangling.
Matti Hamalainen <ccr@tnsp.org>
parents: 117
diff changeset
507 if {[regexp -nocase -- "SWFObject.\"(\[^\"\]+)\", *\"flashvideo" $udata umatches utmp]} {
e5f2961a6145 urllog: Improve rasiatube URL de-mangling.
Matti Hamalainen <ccr@tnsp.org>
parents: 117
diff changeset
508 regsub "http:\/\/www.dailymotion.com\/swf\/" $utmp "http:\/\/www.dailymotion.com\/video\/" urlStr
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
509 set rasia 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
510 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
511 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
512 if {$rasia != 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
513 urllog_log "RasiaTube mangler: $urlStr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
514 urllog_verb_msg $urlNick $urlChan "Korjataan haiseva rasiatube-linkki: $urlStr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
515 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
516 }
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
517
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
518 # Check if the URL already exists, just in case we had some redirects
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
519 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} {
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
520 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
521 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
522 return 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
523 } else {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
524 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($ucode)"
224
aaf433ab696a urllog: Improve error messages a bit.
Matti Hamalainen <ccr@tnsp.org>
parents: 223
diff changeset
525 urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$urlStr"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
526 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
527 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
528
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
529
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
530 #-------------------------------------------------------------------------
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
531
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
532
249
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
533 proc urllog_check_line {unick uhost uhand uchan utext} {
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
534 global urllog_log_channels
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
535
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
536 ### Check the nick
87
97c56d1e9ce2 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 86
diff changeset
537 if {$unick == "*"} {
249
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
538 urllog_log "urllog_check_line: Nick was wc, this should not happen."
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
539 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
540 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
541
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
542 ### Check the channel
229
c764d1706abf urllog: Oops, a bugfix.
Matti Hamalainen <ccr@tnsp.org>
parents: 227
diff changeset
543 foreach akey [split $urllog_log_channels ";"] {
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
544 if {[string match $akey $uchan]} {
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
545 ### Do the URL checking
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
546 foreach str [split $utext " "] {
221
b8bf9d7666b6 urllog: Improve URL / link matching.
Matti Hamalainen <ccr@tnsp.org>
parents: 219
diff changeset
547 if {[regexp "((ftp|http|https)://\[^\[:space:\]\]+|^(www|ftp)\.\[^\[:space:\]\]+)" $str ulink]} {
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
548 urllog_check_url $str $unick $uhost $uchan
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
549 }
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
550 }
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
551 return 0
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
552 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
553 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
554
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
555 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
556 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
557
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
558
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
559 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
560 ### Parse arguments, find and show the results
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
561 proc urllog_find {unick uhand uchan utext upublic} {
62
6428b1bcb34b urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents: 50
diff changeset
562 global urllog_shorturl urldb
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
563 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
564
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
565 if {$upublic == 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
566 set ulimit 5
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
567 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
568 set ulimit 3
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
569 }
19
9cf22053e5da Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 18
diff changeset
570
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
571 ### Parse the given command
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
572 urllog_log "$unick/$uhand searched URL: $utext"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
573
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
574 set ftokens [split $utext " "]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
575 set fpatlist ""
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
576 foreach ftoken $ftokens {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
577 set fprefix [string range $ftoken 0 0]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
578 set fpattern [string range $ftoken 1 end]
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
579 set qpattern "'%[urllog_escape $fpattern]%'"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
580
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
581 if {$fprefix == "-"} {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
582 lappend fpatlist "(url NOT LIKE $qpattern OR title NOT LIKE $qpattern)"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
583 } elseif {$fprefix == "%"} {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
584 lappend fpatlist "user LIKE $qpattern"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
585 } elseif {$fprefix == "@"} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
586 # foo
112
fae3dd7a8b20 urllog: Oops, a typo in variable name. Fixed.
Matti Hamalainen <ccr@tnsp.org>
parents: 111
diff changeset
587 } elseif {$fprefix == "+"} {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
588 lappend fpatlist "(url LIKE $qpattern OR title LIKE $qpattern)"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
589 } else {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
590 set qpattern "'%[urllog_escape $ftoken]%'"
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
591 lappend fpatlist "(url LIKE $qpattern OR title LIKE $qpattern)"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
592 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
593 }
19
9cf22053e5da Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 18
diff changeset
594
27
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
595 if {[llength $fpatlist] > 0} {
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
596 set fquery "WHERE [join $fpatlist " AND "]"
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
597 } else {
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
598 set fquery ""
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
599 }
68
3762c621d1c3 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 65
diff changeset
600
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
601 set iresults 0
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
602 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit"
68
3762c621d1c3 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 65
diff changeset
603 urldb eval $usql {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
604 incr iresults
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
605 set shortURL $uurl
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
606 if {$urllog_shorturl != 0 && $uid != ""} {
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
607 set shortURL "$shortURL [urllog_get_short $uid]"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
608 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
609 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[urllog_ctime $utime])"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
610 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
611
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
612 if {$iresults == 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
613 # If no URLs were found
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
614 urllog_msg $upublic $unick $uchan $urlmsg_nomatch
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
615 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
616
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
617 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
618 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
619
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
620
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
621 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
622 ### Finding binded functions
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
623 proc urllog_pub_urlfind {unick uhost uhand uchan utext} {
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
624 global urllog_search_channels
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
625
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
626 foreach akey [split $urllog_search_channels ";"] {
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
627 if {[string match $akey $uchan]} {
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
628 return [urllog_find $unick $uhand $uchan $utext 1]
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
629 }
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
630 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
631 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
632 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
633
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
634
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
635 proc urllog_msg_urlfind {unick uhost uhand utext} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
636 urllog_find $unick $uhand "" $utext 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
637 return 0
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
638 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
639
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
640 # end of script