Mercurial > hg > egg-tcls
annotate urllog.tcl @ 586:23f2c71fdb90
urllog: Bump version.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 24 Jan 2021 23:13:41 +0200 |
parents | a5dc31f5b44e |
children | 2294b73df2cf |
rev | line source |
---|---|
0 | 1 ########################################################################## |
2 # | |
586 | 3 # URLLog v2.6.0 by Matti 'ccr' Hamalainen <ccr@tnsp.org> |
578 | 4 # (C) Copyright 2000-2021 Tecnic Software productions (TNSP) |
0 | 5 # |
113
077c7383f36f
urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents:
112
diff
changeset
|
6 # This script is freely distributable under GNU GPL (version 2) license. |
077c7383f36f
urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents:
112
diff
changeset
|
7 # |
0 | 8 ########################################################################## |
9 # | |
458
cfbe6acc1d73
urllog: tcl-tls 1.7.x (tested with 1.7.13) is now a requirement. It is
Matti Hamalainen <ccr@tnsp.org>
parents:
457
diff
changeset
|
10 # NOTICE! NOTICE! This script REQUIRES tcl-tls 1.7.13+ if you wish to |
cfbe6acc1d73
urllog: tcl-tls 1.7.x (tested with 1.7.13) is now a requirement. It is
Matti Hamalainen <ccr@tnsp.org>
parents:
457
diff
changeset
|
11 # support SSL/TLS https for URL checking. And you probably do. |
cfbe6acc1d73
urllog: tcl-tls 1.7.x (tested with 1.7.13) is now a requirement. It is
Matti Hamalainen <ccr@tnsp.org>
parents:
457
diff
changeset
|
12 # |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
13 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database |
81
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
14 # This script requires SQLite TCL extension. Under Debian, you need: |
17e542b7985a
urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
73
diff
changeset
|
15 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course) |
50
f69363fc1f61
Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents:
49
diff
changeset
|
16 # |
479
fc3b6bc37927
urllog: Improve documentation slightly.
Matti Hamalainen <ccr@tnsp.org>
parents:
473
diff
changeset
|
17 # If you are doing a fresh install, you will need to create the initial |
fc3b6bc37927
urllog: Improve documentation slightly.
Matti Hamalainen <ccr@tnsp.org>
parents:
473
diff
changeset
|
18 # database with the required table schemas. You can do that by running |
503
fdd1f0b83685
urllog: Migration from old versions hasn't been supported in reality for a
Matti Hamalainen <ccr@tnsp.org>
parents:
493
diff
changeset
|
19 # "create_urllog_db.tcl". You also need to set up the configuration in |
fdd1f0b83685
urllog: Migration from old versions hasn't been supported in reality for a
Matti Hamalainen <ccr@tnsp.org>
parents:
493
diff
changeset
|
20 # "config.urllog" file. |
0 | 21 # |
22 ########################################################################## | |
13
e06d41fb69d5
Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents:
8
diff
changeset
|
23 |
263
f01d60175c44
urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents:
260
diff
changeset
|
24 ### The configuration should be in config.urllog in same directory |
f01d60175c44
urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents:
260
diff
changeset
|
25 ### as this script. Or change the line below to point where ever |
f01d60175c44
urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents:
260
diff
changeset
|
26 ### you wish. See "config.urllog.example" for an example config file. |
f01d60175c44
urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents:
260
diff
changeset
|
27 source [file dirname [info script]]/config.urllog |
0 | 28 |
291
54d34d086b47
urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
289
diff
changeset
|
29 ### Required utillib.tcl |
54d34d086b47
urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
289
diff
changeset
|
30 source [file dirname [info script]]/utillib.tcl |
54d34d086b47
urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
289
diff
changeset
|
31 |
0 | 32 |
33 ########################################################################## | |
34 # No need to look below this line | |
35 ########################################################################## | |
560 | 36 package require sqlite3 |
37 package require http | |
38 | |
0 | 39 set urllog_name "URLLog" |
586 | 40 set urllog_version "2.6.0" |
578 | 41 set urllog_message "$urllog_name v$urllog_version (C) 2000-2021 ccr/TNSP" |
560 | 42 |
0 | 43 |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
44 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] |
561 | 45 set urllog_shorturl_str "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" |
0 | 46 |
102
5425dc418505
urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents:
101
diff
changeset
|
47 |
0 | 48 ### Binding initializations |
531
137d61a0b5b7
urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents:
503
diff
changeset
|
49 bind pub - !urlfind urllog_pub_cmd_urlfind |
137d61a0b5b7
urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents:
503
diff
changeset
|
50 bind msg - !urlfind urllog_msg_cmd_urlfind |
249 | 51 bind pubm - *.* urllog_check_line |
52 bind topc - *.* urllog_check_line | |
0 | 53 |
54 | |
55 #------------------------------------------------------------------------- | |
56 ### Utility functions | |
560 | 57 proc urllog_log {umsg} { |
267
da239a953e24
urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents:
264
diff
changeset
|
58 global urllog_log_enable urllog_name |
0 | 59 |
267
da239a953e24
urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents:
264
diff
changeset
|
60 if {$urllog_log_enable != 0} { |
560 | 61 putlog "$urllog_name: $umsg" |
28 | 62 } |
0 | 63 } |
64 | |
65 | |
66 proc urllog_isnumber {uarg} { | |
28 | 67 foreach i [split $uarg {}] { |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
68 if {![string match \[0-9\] $i]} { return 0 } |
28 | 69 } |
65
31c8c4f50aa6
urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents:
62
diff
changeset
|
70 return 1 |
0 | 71 } |
72 | |
73 | |
74 proc urllog_msg {apublic anick achan amsg} { | |
28 | 75 global urllog_preferredmsg |
0 | 76 |
28 | 77 if {$apublic == 1} { |
78 putserv "$urllog_preferredmsg $achan :$amsg" | |
79 } else { | |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
80 putserv "$urllog_preferredmsg $anick :$amsg" |
28 | 81 } |
0 | 82 } |
83 | |
84 | |
85 proc urllog_verb_msg {anick achan amsg} { | |
28 | 86 global urllog_verbose |
0 | 87 |
28 | 88 if {$urllog_verbose != 0} { |
89 urllog_msg 1 $anick $achan $amsg | |
90 } | |
0 | 91 } |
92 | |
93 | |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
94 proc urllog_sanitize_encoding {uencoding} { |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
95 regsub -- "^\[a-z\]\[a-z\]_\[A-Z\]\[A-Z\]\." $uencoding "" uencoding |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
96 set uencoding [string tolower $uencoding] |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
97 regsub -- "^iso-" $uencoding "iso" uencoding |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
98 return $uencoding |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
99 } |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
100 |
294 | 101 |
0 | 102 #------------------------------------------------------------------------- |
150
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
103 proc urllog_get_short {utime} { |
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
104 global urllog_shorturl_prefix urllog_shorturl_str |
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
105 |
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
106 set ulen [string length $urllog_shorturl_str] |
0 | 107 |
28 | 108 set u1 [expr $utime / ($ulen * $ulen)] |
109 set utmp [expr $utime % ($ulen * $ulen)] | |
110 set u2 [expr $utmp / $ulen] | |
111 set u3 [expr $utmp % $ulen] | |
0 | 112 |
150
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
113 return "\[ $urllog_shorturl_prefix[string index $urllog_shorturl_str $u1][string index $urllog_shorturl_str $u2][string index $urllog_shorturl_str $u3] \]" |
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
114 } |
0 | 115 |
116 | |
117 #------------------------------------------------------------------------- | |
118 proc urllog_chop_url {url} { | |
28 | 119 global urllog_shorturl_orig |
68 | 120 |
28 | 121 if {[string length $url] > $urllog_shorturl_orig} { |
122 return "[string range $url 0 $urllog_shorturl_orig]..." | |
123 } else { | |
124 return $url | |
125 } | |
0 | 126 } |
127 | |
241 | 128 |
0 | 129 #------------------------------------------------------------------------- |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
130 proc urllog_exists {urlStr urlNick urlHost urlChan} { |
28 | 131 global urldb urlmsg_alreadyknown urllog_shorturl |
315
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
132 global urllog_msg_channels |
0 | 133 |
295
141bb4a2b76f
utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents:
294
diff
changeset
|
134 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[utl_escape $urlStr]'" |
297 | 135 urldb eval $usql { |
28 | 136 urllog_log "URL said by $urlNick ($urlStr) already known" |
137 if {$urllog_shorturl != 0} { | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
138 set qstr "[urllog_get_short $uid] " |
28 | 139 } else { |
140 set qstr "" | |
141 } | |
422
880a07485275
Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents:
372
diff
changeset
|
142 append qstr "($uuser/$uchan@[utl_ctime $utime])" |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
143 if {[string length $utitle] > 0} { |
311 | 144 set qstr "$urlmsg_alreadyknown - '$utitle' $qstr" |
28 | 145 } else { |
146 set qstr "$urlmsg_alreadyknown $qstr" | |
147 } | |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
148 |
315
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
149 if {[utl_match_delim_list $urllog_msg_channels $uchan]} { |
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
150 urllog_verb_msg $urlNick $urlChan $qstr |
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
151 } |
28 | 152 return 0 |
153 } | |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
154 return 1 |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
155 } |
0 | 156 |
18
1e2232135354
More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents:
13
diff
changeset
|
157 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
158 #------------------------------------------------------------------------- |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
159 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
160 global urldb urllog_shorturl |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
161 |
93
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
162 if {$urlTitle == ""} { |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
163 set uins "NULL" |
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
164 } else { |
295
141bb4a2b76f
utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents:
294
diff
changeset
|
165 set uins "'[utl_escape $urlTitle]'" |
93
4e02c0219afe
urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents:
92
diff
changeset
|
166 } |
295
141bb4a2b76f
utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents:
294
diff
changeset
|
167 set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[utl_escape $urlStr]', '[utl_escape $urlNick]', '[utl_escape $urlHost]', '[utl_escape $urlChan]', $uins)" |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
168 if {[catch {urldb eval $usql} uerrmsg]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
169 urllog_log "$uerrmsg on SQL:\n$usql" |
28 | 170 return 0 |
171 } | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
172 set uid [urldb last_insert_rowid] |
28 | 173 urllog_log "Added URL ($urlNick@$urlChan): $urlStr" |
0 | 174 |
175 | |
28 | 176 ### Let's say something, to confirm that everything went well. |
177 if {$urllog_shorturl != 0} { | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
178 set qstr "[urllog_get_short $uid] " |
28 | 179 } else { |
180 set qstr "" | |
181 } | |
182 if {[string length $urlTitle] > 0} { | |
311 | 183 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" |
28 | 184 } else { |
185 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" | |
186 } | |
0 | 187 |
28 | 188 return 1 |
0 | 189 } |
190 | |
191 | |
192 #------------------------------------------------------------------------- | |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
193 proc urllog_clear_request { urlStatus urlSCode urlCode urlData urlMeta } { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
194 upvar $urlStatus ustatus |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
195 upvar $urlSCode uscode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
196 upvar $urlCode ucode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
197 upvar $urlData udata |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
198 upvar $urlMeta umeta |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
199 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
200 unset ustatus |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
201 unset uscode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
202 unset ucode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
203 unset udata |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
204 array unset umeta |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
205 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
206 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
207 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
208 #------------------------------------------------------------------------- |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
209 proc urllog_do_request { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } { |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
210 global urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
211 |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
212 upvar $urlStatus ustatus |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
213 upvar $urlSCode uscode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
214 upvar $urlCode ucode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
215 upvar $urlData udata |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
216 upvar $urlMeta umeta |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
217 |
456
102dc89488af
urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents:
425
diff
changeset
|
218 set urlHeaders {} |
102dc89488af
urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents:
425
diff
changeset
|
219 lappend urlHeaders "Accept-Encoding" "identity" |
457
a7029d65796b
urllog: Do not use Connection: keep-alive for production.
Matti Hamalainen <ccr@tnsp.org>
parents:
456
diff
changeset
|
220 # lappend urlHeaders "Connection" "keep-alive" |
456
102dc89488af
urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents:
425
diff
changeset
|
221 |
102dc89488af
urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents:
425
diff
changeset
|
222 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers $urlHeaders]} uerrmsg]} { |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
223 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
224 urllog_log "HTTP request failed: $uerrmsg" |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
225 return 0 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
226 } |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
227 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
228 set ustatus [::http::status $utoken] |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
229 if {$ustatus == "timeout"} { |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
230 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
231 urllog_log "HTTP request timed out ($urlStr)" |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
232 return 0 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
233 } |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
234 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
235 if {$ustatus != "ok"} { |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
236 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
237 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
238 return 0 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
239 } |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
240 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
241 set ustatus [::http::status $utoken] |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
242 set uscode [::http::code $utoken] |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
243 set ucode [::http::ncode $utoken] |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
244 set udata [::http::data $utoken] |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
245 array set umeta [::http::meta $utoken] |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
246 ::http::cleanup $utoken |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
247 |
584
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
248 # Sanitize the metadata KEYS |
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
249 foreach {ukey uvalue} [array get umeta] { |
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
250 set ukey [string tolower $ukey] |
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
251 set umeta($ukey) $uvalue |
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
252 } |
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
253 |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
254 return 1 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
255 } |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
256 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
257 #------------------------------------------------------------------------- |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
258 proc urllog_validate_url { urlNick urlChan urlMStr urlMProto urlMHostName } { |
579
fec0911e9ef9
urllog: Remove TLD checks, as they are rather useless nowadays.
Matti Hamalainen <ccr@tnsp.org>
parents:
578
diff
changeset
|
259 global urlmsg_nosuchhost urllog_httprep |
fec0911e9ef9
urllog: Remove TLD checks, as they are rather useless nowadays.
Matti Hamalainen <ccr@tnsp.org>
parents:
578
diff
changeset
|
260 global urllog_shorturl_prefix urllog_shorturl |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
261 upvar $urlMStr urlStr |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
262 upvar $urlMProto urlProto |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
263 upvar $urlMHostName urlHostName |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
264 |
571
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
265 ### Hack for removing parenthesis around an URL |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
266 if {[regexp {^\((.+)\)$} $urlStr urlMatch urlClean]} { |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
267 set urlStr $urlClean |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
268 } |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
269 |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
270 ### Clean excess stuff, if any, and attempt to |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
271 ### guess the URL protocol component if it is missing |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
272 if {[regexp "(\[a-z\]+)://\[^ \]+" $urlStr urlMatch urlProto]} { |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
273 set urlStr $urlMatch |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
274 } elseif {[regexp "www\.\[^ \]+" $urlStr urlMatch]} { |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
275 set urlStr "http://$urlMatch" |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
276 } elseif {[regexp "ftp\.\[^ \]+" $urlStr urlMatch]} { |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
277 set urlStr "ftp://$urlMatch" |
28 | 278 } |
0 | 279 |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
280 ### Handle URLs that have an IPv4-address |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
281 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr urlMatch urlProto ni1 ni2 ni3 ni4]} { |
28 | 282 # Check if the IP is on local network |
572
295c225e3152
urllog: Improve invalid/local IPv4 network check
Matti Hamalainen <ccr@tnsp.org>
parents:
571
diff
changeset
|
283 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168)} { |
295c225e3152
urllog: Improve invalid/local IPv4 network check
Matti Hamalainen <ccr@tnsp.org>
parents:
571
diff
changeset
|
284 urllog_log "URL pointing to local network, ignored ($urlStr)." |
28 | 285 return 0 |
286 } | |
572
295c225e3152
urllog: Improve invalid/local IPv4 network check
Matti Hamalainen <ccr@tnsp.org>
parents:
571
diff
changeset
|
287 if {$ni1 == 0 || $ni1 >= 255 || $ni2 >= 255 || $ni3 >= 255 || $ni4 >= 255} { |
571
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
288 urllog_log "URL pointing to invalid network, ignored ($urlStr)." |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
289 return 0 |
d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents:
570
diff
changeset
|
290 } |
28 | 291 } |
0 | 292 |
96
e5a6c27be365
urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents:
95
diff
changeset
|
293 ### Check now if we have an ShortURL here ... |
150
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
294 if {[string match "$urllog_shorturl_prefix*" $urlStr]} { |
98
fbbe7ee40e2f
urllog: Improve one informational / error message.
Matti Hamalainen <ccr@tnsp.org>
parents:
97
diff
changeset
|
295 urllog_log "Ignoring ShortURL from $urlNick: $urlStr" |
252
eb2fce89b8ab
urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents:
251
diff
changeset
|
296 # set uud "" |
eb2fce89b8ab
urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents:
251
diff
changeset
|
297 # set usql "SELECT id AS uid, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE utime=$uud" |
eb2fce89b8ab
urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents:
251
diff
changeset
|
298 # urldb eval $usql { |
eb2fce89b8ab
urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents:
251
diff
changeset
|
299 # urllog_verb_msg $urlNick $urlChan "'$utitle' - $uurl" |
eb2fce89b8ab
urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents:
251
diff
changeset
|
300 # return 1 |
eb2fce89b8ab
urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents:
251
diff
changeset
|
301 # } |
28 | 302 return 0 |
303 } | |
0 | 304 |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
305 ### Get URL protocol component |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
306 set urlProto "" |
464
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
307 if {[regexp "(\[a-z\]+)://" $urlStr urlMatch urlProto]} { |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
308 ### Is it a http or ftp url? |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
309 if {$urlProto != "http" && $urlProto != "https" && $urlProto != "ftp"} { |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
310 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED protocol class ($urlProto)." |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
311 return 0 |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
312 } |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
313 } else { |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
314 urllog_log "Broken URL from $urlNick: ($urlStr), no protocol specifier." |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
315 return 0 |
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
316 } |
95
687bdd74dfac
urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents:
93
diff
changeset
|
317 |
28 | 318 ### Check the PORT (if the ":" is there) |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
319 set urlRecord [split $urlStr "/"] |
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
320 set urlHostName [lindex $urlRecord 2] |
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
321 set urlPort [lindex [split $urlHostName ":"] end] |
0 | 322 |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
323 if {![urllog_isnumber $urlPort] && $urlPort != "" && $urlPort != $urlHostName} { |
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
324 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $urlPort" |
28 | 325 return 0 |
326 } | |
0 | 327 |
328 | |
28 | 329 set urlStr [string map $urllog_httprep $urlStr] |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
330 return 1 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
331 } |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
332 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
333 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
334 #------------------------------------------------------------------------- |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
335 proc urllog_handle_redirect {urlNick urlHost urlChan urlRedirLevel urlProto urlHostName urlStr urlStatus urlSCode urlCode urlData urlMeta} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
336 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
337 upvar $urlProto uproto |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
338 upvar $urlHostName uhostname |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
339 upvar $urlStr ustr |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
340 upvar $urlStatus ustatus |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
341 upvar $urlSCode uscode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
342 upvar $urlCode ucode |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
343 upvar $urlData udata |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
344 upvar $urlMeta umeta |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
345 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
346 if {$ucode >= 301 && $ucode <= 303} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
347 if {[llength [array get umeta "location"]] == 0} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
348 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc (invalid redirect without Location header)" |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
349 urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$ustr : Invalid redirect without Location header (redirLevel=${urlRedirLevel}" |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
350 return 0 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
351 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
352 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
353 set nustr $umeta(location) |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
354 if {![regexp "\[a-z\]+://" $nustr]} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
355 if {[string range $nustr 0 0] != "/"} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
356 append nustr "/" |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
357 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
358 set nustr "${uproto}://${uhostname}${nustr}" |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
359 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
360 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
361 urllog_log "Redirection #${urlRedirLevel}: $ustr -> $nustr" |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
362 set ustr $nustr |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
363 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
364 if {![urllog_validate_url $urlNick $urlChan ustr uproto uhostname]} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
365 return 0 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
366 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
367 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
368 urllog_clear_request ustatus uscode ucode udata umeta |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
369 if {![urllog_do_request $urlNick $urlChan $ustr ustatus uscode ucode udata umeta]} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
370 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)" |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
371 return 0 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
372 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
373 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
374 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
375 return 1 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
376 } |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
377 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
378 |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
379 #------------------------------------------------------------------------- |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
380 proc urllog_check_url {urlStr urlNick urlHost urlChan} { |
299
1ff281e821a3
urllog: Make rasiatube hack configurable.
Matti Hamalainen <ccr@tnsp.org>
parents:
298
diff
changeset
|
381 global urllog_encoding http_tls_support urlmsg_errorgettingdoc |
581
148c7553c50f
urllog: Remove rasiatube hack setting, it's not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
579
diff
changeset
|
382 global urllog_extra_checks urllog_extra_strict |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
383 |
91
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
384 ### Does the URL already exist? |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
385 if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
386 return 1 |
6f4bfd8e9447
urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents:
90
diff
changeset
|
387 } |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
388 |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
389 ### Validate URL compoments, etc. |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
390 set urlProto "" |
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
391 set urlHostName "" |
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
392 if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} { |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
393 return 1 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
394 } |
0 | 395 |
267
da239a953e24
urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents:
264
diff
changeset
|
396 ### Do we perform additional checks? |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
397 if {$urllog_extra_checks == 0 || !(($http_tls_support != 0 && $urlProto == "https") || $urlProto == "http")} { |
230 | 398 # No optional checks, or it's not http/https. |
306 | 399 if {$urllog_extra_strict == 0} { |
304
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
400 # Strict checking disabled, so add the URL, if it does not exist already. |
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
401 urllog_addurl $urlStr $urlNick $urlHost $urlChan "" |
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
402 return 1 |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
403 } elseif {$http_tls_support == 0 && $urlProto == "https"} { |
304
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
404 # Strict ENABLED: If TLS support is disabled and we have https, do nothing |
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
405 return 1 |
327
a5282cdc56e6
urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents:
319
diff
changeset
|
406 } elseif {$urlProto != "http" && $urlProto != "https"} { |
304
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
407 # Strict ENABLED: It's not http, or https |
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
408 return 1 |
f1589fe20732
urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents:
302
diff
changeset
|
409 } |
28 | 410 } |
7
50b52294e93e
urllog: Strip ‏ entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents:
4
diff
changeset
|
411 |
28 | 412 ### Does the document pointed by the URL exist? |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
413 if {![urllog_do_request $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
414 return 1 |
28 | 415 } |
0 | 416 |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
417 ### Handle redirects of 2 levels |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
418 if {![urllog_handle_redirect $urlNick $urlHost $urlChan 1 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
419 return 1 |
28 | 420 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
421 |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
422 if {![urllog_handle_redirect $urlNick $urlHost $urlChan 2 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { |
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
423 return 1 |
251
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
424 } |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
425 |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
426 # Final document |
e59f0c3ea0f4
urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents:
250
diff
changeset
|
427 if {$ucode >= 200 && $ucode <= 205} { |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
428 set uenc_doc "" |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
429 set uenc_http "" |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
430 set uencoding "" |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
431 |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
432 # Get information about specified character encodings |
584
9b64f201b3a7
urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents:
583
diff
changeset
|
433 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(content-type) umatches uenc_http]} { |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
434 # Found character set encoding information in HTTP headers |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
435 } |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
436 |
150
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
437 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} { |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
438 # Found old style HTML meta tag with character set information |
150
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
439 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} { |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
440 # Found HTML5 style meta tag with character set information |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
441 } |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
442 |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
443 # Make sanitized versions of the encoding strings |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
444 set uenc_http2 [urllog_sanitize_encoding $uenc_http] |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
445 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc] |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
446 |
311 | 447 # Check if the document has specified encoding |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
448 # KLUDGE! |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
449 set uencoding $uenc_http2 |
318
5d886e2137d5
urllog: Fix character set conversion a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
315
diff
changeset
|
450 if {$uencoding == "" && $uenc_doc2 != ""} { |
5d886e2137d5
urllog: Fix character set conversion a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
315
diff
changeset
|
451 set uencoding $uenc_doc2 |
5d886e2137d5
urllog: Fix character set conversion a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
315
diff
changeset
|
452 } elseif {$uencoding == ""} { |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
453 # If _NO_ known encoding of any kind, assume the default of iso8859-1 |
86
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
454 set uencoding "iso8859-1" |
4c2b6482c08c
urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
84
diff
changeset
|
455 } |
0 | 456 |
311 | 457 urllog_log "Charsets: http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2' -> '$uencoding'" |
458 | |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
459 # Get the document title, if any |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
460 set urlTitle "" |
470
2faf2eb18f26
urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents:
469
diff
changeset
|
461 |
473
d155cec91375
urllog: Fixes to Twitter title matching.
Matti Hamalainen <ccr@tnsp.org>
parents:
472
diff
changeset
|
462 if {[regexp -nocase -- "<meta name=\"twitter:title\" content=\"(.\*\?)\"\\s\*\/\?>" $udata umatches urlTitle]} { |
470
2faf2eb18f26
urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents:
469
diff
changeset
|
463 # ... |
2faf2eb18f26
urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents:
469
diff
changeset
|
464 } elseif {[regexp -nocase -- "<title.\*\?>(.\*\?)</title>" $udata umatches urlTitle]} { |
2faf2eb18f26
urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents:
469
diff
changeset
|
465 # ... |
2faf2eb18f26
urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents:
469
diff
changeset
|
466 } |
313
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
467 |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
468 # If facebook, get meta info |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
469 if {[regexp -nocase -- "(http|https):\/\/www.facebook.com" $urlStr]} { |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
470 if {[regexp -nocase -- "<meta name=\"description\" content=\"(.\*\?)\"" $udata umatches urlTmp]} { |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
471 if {$urlTitle != ""} { append urlTitle " :: " } |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
472 append urlTitle $urlTmp |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
473 } |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
474 } |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
475 |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
476 # If character set conversion is required, do it now |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
477 if {$urlTitle != "" && $uencoding != ""} { |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
478 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { |
8175ef52889b
urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
312
diff
changeset
|
479 urllog_log "Error in charset conversion: $cerrmsg" |
28 | 480 } |
150
52350ed97775
urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents:
136
diff
changeset
|
481 |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
482 # Convert some HTML entities to plaintext and do some cleanup |
291
54d34d086b47
urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
289
diff
changeset
|
483 set utmp [utl_convert_html_ent $urlTitle] |
116
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
484 regsub -all "\r|\n|\t" $utmp " " utmp |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
485 regsub -all " *" $utmp " " utmp |
4f3edcf72987
urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents:
115
diff
changeset
|
486 set urlTitle [string trim $utmp] |
28 | 487 } |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
488 |
83
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
489 # Check if the URL already exists, just in case we had some redirects |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
490 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} { |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
491 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle |
f171a9fb7b7b
urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents:
82
diff
changeset
|
492 } |
28 | 493 return 1 |
494 } else { | |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
495 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)" |
224
aaf433ab696a
urllog: Improve error messages a bit.
Matti Hamalainen <ccr@tnsp.org>
parents:
223
diff
changeset
|
496 urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$urlStr" |
585
a5dc31f5b44e
urllog: Clean up redirection handling and improve error checking and handling.
Matti Hamalainen <ccr@tnsp.org>
parents:
584
diff
changeset
|
497 return 0 |
28 | 498 } |
0 | 499 } |
500 | |
501 | |
502 #------------------------------------------------------------------------- | |
219
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
503 |
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
504 |
249 | 505 proc urllog_check_line {unick uhost uhand uchan utext} { |
219
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
506 global urllog_log_channels |
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
507 |
28 | 508 ### Check the nick |
87 | 509 if {$unick == "*"} { |
249 | 510 urllog_log "urllog_check_line: Nick was wc, this should not happen." |
28 | 511 return 0 |
512 } | |
0 | 513 |
219
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
514 ### Check the channel |
315
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
515 if {[utl_match_delim_list $urllog_log_channels $uchan]} { |
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
516 ### Do the URL checking |
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
517 foreach str [split $utext " "] { |
464
506977ea9d0c
urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents:
458
diff
changeset
|
518 if {[regexp "(\[a-z]+://\[^\[:space:\]\]+|^(www|ftp)\.\[^\[:space:\]\]+)" $str ulink]} { |
315
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
519 urllog_check_url $str $unick $uhost $uchan |
219
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
520 } |
28 | 521 } |
522 } | |
0 | 523 |
28 | 524 return 0 |
0 | 525 } |
526 | |
527 | |
528 #------------------------------------------------------------------------- | |
529 ### Parse arguments, find and show the results | |
530 proc urllog_find {unick uhand uchan utext upublic} { | |
62
6428b1bcb34b
urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents:
50
diff
changeset
|
531 global urllog_shorturl urldb |
28 | 532 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch |
0 | 533 |
28 | 534 if {$upublic == 0} { |
535 set ulimit 5 | |
536 } else { | |
537 set ulimit 3 | |
538 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
539 |
28 | 540 ### Parse the given command |
541 urllog_log "$unick/$uhand searched URL: $utext" | |
0 | 542 |
28 | 543 set ftokens [split $utext " "] |
544 set fpatlist "" | |
545 foreach ftoken $ftokens { | |
546 set fprefix [string range $ftoken 0 0] | |
547 set fpattern [string range $ftoken 1 end] | |
295
141bb4a2b76f
utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents:
294
diff
changeset
|
548 set qpattern "'%[utl_escape $fpattern]%'" |
0 | 549 |
28 | 550 if {$fprefix == "-"} { |
128
0d21b9d1d2b9
urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
127
diff
changeset
|
551 lappend fpatlist "(url NOT LIKE $qpattern OR title NOT LIKE $qpattern)" |
28 | 552 } elseif {$fprefix == "%"} { |
128
0d21b9d1d2b9
urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
127
diff
changeset
|
553 lappend fpatlist "user LIKE $qpattern" |
28 | 554 } elseif {$fprefix == "@"} { |
555 # foo | |
112
fae3dd7a8b20
urllog: Oops, a typo in variable name. Fixed.
Matti Hamalainen <ccr@tnsp.org>
parents:
111
diff
changeset
|
556 } elseif {$fprefix == "+"} { |
128
0d21b9d1d2b9
urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
127
diff
changeset
|
557 lappend fpatlist "(url LIKE $qpattern OR title LIKE $qpattern)" |
28 | 558 } else { |
295
141bb4a2b76f
utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents:
294
diff
changeset
|
559 set qpattern "'%[utl_escape $ftoken]%'" |
128
0d21b9d1d2b9
urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
127
diff
changeset
|
560 lappend fpatlist "(url LIKE $qpattern OR title LIKE $qpattern)" |
28 | 561 } |
562 } | |
19
9cf22053e5da
Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents:
18
diff
changeset
|
563 |
27
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
564 if {[llength $fpatlist] > 0} { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
565 set fquery "WHERE [join $fpatlist " AND "]" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
566 } else { |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
567 set fquery "" |
6e381916b016
Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents:
20
diff
changeset
|
568 } |
68 | 569 |
28 | 570 set iresults 0 |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
571 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit" |
68 | 572 urldb eval $usql { |
28 | 573 incr iresults |
574 set shortURL $uurl | |
82
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
575 if {$urllog_shorturl != 0 && $uid != ""} { |
1bbc79f41a1c
urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents:
81
diff
changeset
|
576 set shortURL "$shortURL [urllog_get_short $uid]" |
28 | 577 } |
422
880a07485275
Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents:
372
diff
changeset
|
578 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[utl_ctime $utime])" |
28 | 579 } |
424
825cac46b1cb
Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents:
422
diff
changeset
|
580 |
28 | 581 if {$iresults == 0} { |
582 # If no URLs were found | |
583 urllog_msg $upublic $unick $uchan $urlmsg_nomatch | |
584 } | |
0 | 585 |
28 | 586 return 0 |
0 | 587 } |
588 | |
589 | |
590 #------------------------------------------------------------------------- | |
591 ### Finding binded functions | |
531
137d61a0b5b7
urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents:
503
diff
changeset
|
592 proc urllog_pub_cmd_urlfind {unick uhost uhand uchan utext} { |
219
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
593 global urllog_search_channels |
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
594 |
315
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
595 if {[utl_match_delim_list $urllog_search_channels $uchan]} { |
7a987b22a817
urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents:
313
diff
changeset
|
596 return [urllog_find $unick $uhand $uchan $utext 1] |
219
4e09bcc48851
urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents:
218
diff
changeset
|
597 } |
28 | 598 return 0 |
0 | 599 } |
600 | |
601 | |
531
137d61a0b5b7
urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents:
503
diff
changeset
|
602 proc urllog_msg_cmd_urlfind {unick uhost uhand utext} { |
28 | 603 urllog_find $unick $uhand "" $utext 0 |
604 return 0 | |
3
8003090caa35
Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents:
0
diff
changeset
|
605 } |
0 | 606 |
560 | 607 |
608 #------------------------------------------------------------------------- | |
609 # Script initialization | |
610 #------------------------------------------------------------------------- | |
570 | 611 ### Initialization messages |
560 | 612 putlog "$urllog_message" |
613 | |
570 | 614 |
615 ### Miscellaneous init messages | |
560 | 616 if {$urllog_extra_checks != 0} { |
617 putlog " - Additional URL validity checks enabled." | |
618 } | |
619 | |
620 if {$urllog_verbose != 0} { | |
621 putlog " - Verbose mode enabled." | |
622 } | |
623 | |
570 | 624 |
560 | 625 ### HTTP module initialization |
626 if {[info exists http_user_agent] && $http_user_agent != ""} { | |
627 ::http::config -useragent $http_user_agent | |
628 } else { | |
629 ::http::config -useragent "$urllog_name/$urllog_version" | |
630 } | |
631 | |
632 if {[info exists http_use_proxy] && $http_use_proxy != 0} { | |
633 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port | |
634 putlog " - Using proxy $http_proxy_host:$http_proxy_port" | |
635 } | |
636 | |
637 if {[info exists http_tls_support] && $http_tls_support != 0} { | |
638 package require tls | |
639 ::http::register https 443 [list ::tls::socket -request true -require true -ssl2 false -ssl3 false -tls1 true -tls1.1 true -tls1.2 true -cadir $http_tls_cadir -autoservername true] | |
640 putlog " - TLS/SSL support enabled." | |
641 } | |
642 | |
643 | |
644 ### SQLite database initialization | |
645 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { | |
561 | 646 putlog "Could not open SQLite3 database '${urllog_db_file}': ${uerrmsg}" |
560 | 647 exit 2 |
648 } | |
649 | |
650 | |
0 | 651 # end of script |