annotate urllog.tcl @ 584:9b64f201b3a7

urllog: Use lowercase HTTP metadata keys and sanitize them.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 24 Jan 2021 21:47:27 +0200
parents d5fc3ecee4c7
children a5dc31f5b44e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 #
573
d004944d6afd urllog: Bump version.
Matti Hamalainen <ccr@tnsp.org>
parents: 572
diff changeset
3 # URLLog v2.5.1 by Matti 'ccr' Hamalainen <ccr@tnsp.org>
578
14dfb925a64a Bump copyright years.
Matti Hamalainen <ccr@tnsp.org>
parents: 573
diff changeset
4 # (C) Copyright 2000-2021 Tecnic Software productions (TNSP)
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 #
113
077c7383f36f urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents: 112
diff changeset
6 # This script is freely distributable under GNU GPL (version 2) license.
077c7383f36f urllog: Add line about the script's license.
Matti Hamalainen <ccr@tnsp.org>
parents: 112
diff changeset
7 #
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 #
458
cfbe6acc1d73 urllog: tcl-tls 1.7.x (tested with 1.7.13) is now a requirement. It is
Matti Hamalainen <ccr@tnsp.org>
parents: 457
diff changeset
10 # NOTICE! NOTICE! This script REQUIRES tcl-tls 1.7.13+ if you wish to
cfbe6acc1d73 urllog: tcl-tls 1.7.x (tested with 1.7.13) is now a requirement. It is
Matti Hamalainen <ccr@tnsp.org>
parents: 457
diff changeset
11 # support SSL/TLS https for URL checking. And you probably do.
cfbe6acc1d73 urllog: tcl-tls 1.7.x (tested with 1.7.13) is now a requirement. It is
Matti Hamalainen <ccr@tnsp.org>
parents: 457
diff changeset
12 #
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
13 # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database
81
17e542b7985a urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 73
diff changeset
14 # This script requires SQLite TCL extension. Under Debian, you need:
17e542b7985a urllog, quotedb: Improve documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 73
diff changeset
15 # tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course)
50
f69363fc1f61 Update some comments and add a bit of documentation.
Matti Hamalainen <ccr@tnsp.org>
parents: 49
diff changeset
16 #
479
fc3b6bc37927 urllog: Improve documentation slightly.
Matti Hamalainen <ccr@tnsp.org>
parents: 473
diff changeset
17 # If you are doing a fresh install, you will need to create the initial
fc3b6bc37927 urllog: Improve documentation slightly.
Matti Hamalainen <ccr@tnsp.org>
parents: 473
diff changeset
18 # database with the required table schemas. You can do that by running
503
fdd1f0b83685 urllog: Migration from old versions hasn't been supported in reality for a
Matti Hamalainen <ccr@tnsp.org>
parents: 493
diff changeset
19 # "create_urllog_db.tcl". You also need to set up the configuration in
fdd1f0b83685 urllog: Migration from old versions hasn't been supported in reality for a
Matti Hamalainen <ccr@tnsp.org>
parents: 493
diff changeset
20 # "config.urllog" file.
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21 #
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22 ##########################################################################
13
e06d41fb69d5 Begin work on converting urllog.tcl to use an SQLite3 database instead of flat file.
Matti Hamalainen <ccr@tnsp.org>
parents: 8
diff changeset
23
263
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
24 ### The configuration should be in config.urllog in same directory
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
25 ### as this script. Or change the line below to point where ever
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
26 ### you wish. See "config.urllog.example" for an example config file.
f01d60175c44 urllog: Move configuration to external file.
Matti Hamalainen <ccr@tnsp.org>
parents: 260
diff changeset
27 source [file dirname [info script]]/config.urllog
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
28
291
54d34d086b47 urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 289
diff changeset
29 ### Required utillib.tcl
54d34d086b47 urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 289
diff changeset
30 source [file dirname [info script]]/utillib.tcl
54d34d086b47 urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 289
diff changeset
31
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 ##########################################################################
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34 # No need to look below this line
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35 ##########################################################################
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
36 package require sqlite3
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
37 package require http
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
38
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39 set urllog_name "URLLog"
573
d004944d6afd urllog: Bump version.
Matti Hamalainen <ccr@tnsp.org>
parents: 572
diff changeset
40 set urllog_version "2.5.1"
578
14dfb925a64a Bump copyright years.
Matti Hamalainen <ccr@tnsp.org>
parents: 573
diff changeset
41 set urllog_message "$urllog_name v$urllog_version (C) 2000-2021 ccr/TNSP"
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
42
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43
424
825cac46b1cb Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 422
diff changeset
44 set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"]
561
bdccc83a1c22 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 560
diff changeset
45 set urllog_shorturl_str "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
46
102
5425dc418505 urllog: Entity data is now in UTF-8, but TCL source files are interpreted with current system locale, which may not be UTF-8. We must therefore "convert" the entity mapping string to UTF-8 to be certain of TCL's interpretation of its encoding.
Matti Hamalainen <ccr@tnsp.org>
parents: 101
diff changeset
47
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
48 ### Binding initializations
531
137d61a0b5b7 urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents: 503
diff changeset
49 bind pub - !urlfind urllog_pub_cmd_urlfind
137d61a0b5b7 urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents: 503
diff changeset
50 bind msg - !urlfind urllog_msg_cmd_urlfind
249
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
51 bind pubm - *.* urllog_check_line
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
52 bind topc - *.* urllog_check_line
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
54
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
55 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
56 ### Utility functions
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
57 proc urllog_log {umsg} {
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
58 global urllog_log_enable urllog_name
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
59
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
60 if {$urllog_log_enable != 0} {
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
61 putlog "$urllog_name: $umsg"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
62 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
63 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
64
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
65
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
66 proc urllog_isnumber {uarg} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
67 foreach i [split $uarg {}] {
65
31c8c4f50aa6 urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents: 62
diff changeset
68 if {![string match \[0-9\] $i]} { return 0 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
69 }
65
31c8c4f50aa6 urllog: Improve urllog_isnumber function.
Matti Hamalainen <ccr@tnsp.org>
parents: 62
diff changeset
70 return 1
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
71 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
72
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
73
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
74 proc urllog_msg {apublic anick achan amsg} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
75 global urllog_preferredmsg
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
77 if {$apublic == 1} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
78 putserv "$urllog_preferredmsg $achan :$amsg"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
79 } else {
424
825cac46b1cb Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 422
diff changeset
80 putserv "$urllog_preferredmsg $anick :$amsg"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
81 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
82 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
83
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
84
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
85 proc urllog_verb_msg {anick achan amsg} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
86 global urllog_verbose
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
87
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
88 if {$urllog_verbose != 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
89 urllog_msg 1 $anick $achan $amsg
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
90 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
93
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
94 proc urllog_sanitize_encoding {uencoding} {
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
95 regsub -- "^\[a-z\]\[a-z\]_\[A-Z\]\[A-Z\]\." $uencoding "" uencoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
96 set uencoding [string tolower $uencoding]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
97 regsub -- "^iso-" $uencoding "iso" uencoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
98 return $uencoding
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
99 }
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
100
294
2bb9bcfb104a Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 291
diff changeset
101
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102 #-------------------------------------------------------------------------
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
103 proc urllog_get_short {utime} {
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
104 global urllog_shorturl_prefix urllog_shorturl_str
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
105
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
106 set ulen [string length $urllog_shorturl_str]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
107
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
108 set u1 [expr $utime / ($ulen * $ulen)]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
109 set utmp [expr $utime % ($ulen * $ulen)]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
110 set u2 [expr $utmp / $ulen]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
111 set u3 [expr $utmp % $ulen]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
112
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
113 return "\[ $urllog_shorturl_prefix[string index $urllog_shorturl_str $u1][string index $urllog_shorturl_str $u2][string index $urllog_shorturl_str $u3] \]"
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
114 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
115
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
116
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
118 proc urllog_chop_url {url} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
119 global urllog_shorturl_orig
68
3762c621d1c3 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 65
diff changeset
120
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
121 if {[string length $url] > $urllog_shorturl_orig} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
122 return "[string range $url 0 $urllog_shorturl_orig]..."
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
123 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
124 return $url
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
125 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127
241
669842725e2f Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 240
diff changeset
128
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129 #-------------------------------------------------------------------------
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
130 proc urllog_exists {urlStr urlNick urlHost urlChan} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
131 global urldb urlmsg_alreadyknown urllog_shorturl
315
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
132 global urllog_msg_channels
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
133
295
141bb4a2b76f utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents: 294
diff changeset
134 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[utl_escape $urlStr]'"
297
ecd465aab52e urllog: 10L.
Matti Hamalainen <ccr@tnsp.org>
parents: 295
diff changeset
135 urldb eval $usql {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
136 urllog_log "URL said by $urlNick ($urlStr) already known"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
137 if {$urllog_shorturl != 0} {
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
138 set qstr "[urllog_get_short $uid] "
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
139 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
140 set qstr ""
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
141 }
422
880a07485275 Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents: 372
diff changeset
142 append qstr "($uuser/$uchan@[utl_ctime $utime])"
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
143 if {[string length $utitle] > 0} {
311
adc519c72f53 urllog: Various cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 306
diff changeset
144 set qstr "$urlmsg_alreadyknown - '$utitle' $qstr"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
145 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
146 set qstr "$urlmsg_alreadyknown $qstr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
147 }
424
825cac46b1cb Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 422
diff changeset
148
315
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
149 if {[utl_match_delim_list $urllog_msg_channels $uchan]} {
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
150 urllog_verb_msg $urlNick $urlChan $qstr
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
151 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
152 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
153 }
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
154 return 1
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
155 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
156
18
1e2232135354 More changes for SQLite support.
Matti Hamalainen <ccr@tnsp.org>
parents: 13
diff changeset
157
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
158 #-------------------------------------------------------------------------
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
159 proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} {
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
160 global urldb urllog_shorturl
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
161
93
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
162 if {$urlTitle == ""} {
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
163 set uins "NULL"
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
164 } else {
295
141bb4a2b76f utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents: 294
diff changeset
165 set uins "'[utl_escape $urlTitle]'"
93
4e02c0219afe urllog: Insert NULL into title column when we didn't get a title.
Matti Hamalainen <ccr@tnsp.org>
parents: 92
diff changeset
166 }
295
141bb4a2b76f utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents: 294
diff changeset
167 set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[utl_escape $urlStr]', '[utl_escape $urlNick]', '[utl_escape $urlHost]', '[utl_escape $urlChan]', $uins)"
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
168 if {[catch {urldb eval $usql} uerrmsg]} {
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
169 urllog_log "$uerrmsg on SQL:\n$usql"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
170 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
171 }
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
172 set uid [urldb last_insert_rowid]
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
173 urllog_log "Added URL ($urlNick@$urlChan): $urlStr"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
174
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
175
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
176 ### Let's say something, to confirm that everything went well.
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
177 if {$urllog_shorturl != 0} {
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
178 set qstr "[urllog_get_short $uid] "
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
179 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
180 set qstr ""
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
181 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
182 if {[string length $urlTitle] > 0} {
311
adc519c72f53 urllog: Various cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 306
diff changeset
183 urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
184 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
185 urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr"
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
186 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
188 return 1
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
189 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
191
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
192 #-------------------------------------------------------------------------
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
193 proc urllog_dorequest { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
194 global urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
195
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
196 upvar 1 $urlStatus ustatus
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
197 upvar 1 $urlSCode uscode
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
198 upvar 1 $urlCode ucode
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
199 upvar 1 $urlData udata
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
200 upvar 1 $urlMeta umeta
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
201
456
102dc89488af urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents: 425
diff changeset
202 set urlHeaders {}
102dc89488af urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents: 425
diff changeset
203 lappend urlHeaders "Accept-Encoding" "identity"
457
a7029d65796b urllog: Do not use Connection: keep-alive for production.
Matti Hamalainen <ccr@tnsp.org>
parents: 456
diff changeset
204 # lappend urlHeaders "Connection" "keep-alive"
456
102dc89488af urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents: 425
diff changeset
205
102dc89488af urllog: Improve how http headers are formed.
Matti Hamalainen <ccr@tnsp.org>
parents: 425
diff changeset
206 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers $urlHeaders]} uerrmsg]} {
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
207 urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
208 urllog_log "HTTP request failed: $uerrmsg"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
209 return 0
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
210 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
211
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
212 set ustatus [::http::status $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
213 if {$ustatus == "timeout"} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
214 urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
215 urllog_log "HTTP request timed out ($urlStr)"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
216 return 0
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
217 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
218
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
219 if {$ustatus != "ok"} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
220 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
221 urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
222 return 0
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
223 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
224
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
225 set ustatus [::http::status $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
226 set uscode [::http::code $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
227 set ucode [::http::ncode $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
228 set udata [::http::data $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
229 array set umeta [::http::meta $utoken]
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
230 ::http::cleanup $utoken
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
231
584
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
232 # Sanitize the metadata KEYS
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
233 foreach {ukey uvalue} [array get umeta] {
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
234 set ukey [string tolower $ukey]
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
235 set umeta($ukey) $uvalue
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
236 }
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
237
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
238 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
239 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
240
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
241 #-------------------------------------------------------------------------
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
242 proc urllog_validate_url { urlNick urlChan urlMStr urlMProto urlMHostName } {
579
fec0911e9ef9 urllog: Remove TLD checks, as they are rather useless nowadays.
Matti Hamalainen <ccr@tnsp.org>
parents: 578
diff changeset
243 global urlmsg_nosuchhost urllog_httprep
fec0911e9ef9 urllog: Remove TLD checks, as they are rather useless nowadays.
Matti Hamalainen <ccr@tnsp.org>
parents: 578
diff changeset
244 global urllog_shorturl_prefix urllog_shorturl
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
245 upvar 1 $urlMStr urlStr
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
246 upvar 1 $urlMProto urlProto
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
247 upvar 1 $urlMHostName urlHostName
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
248
571
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
249 ### Hack for removing parenthesis around an URL
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
250 if {[regexp {^\((.+)\)$} $urlStr urlMatch urlClean]} {
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
251 set urlStr $urlClean
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
252 }
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
253
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
254 ### Clean excess stuff, if any, and attempt to
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
255 ### guess the URL protocol component if it is missing
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
256 if {[regexp "(\[a-z\]+)://\[^ \]+" $urlStr urlMatch urlProto]} {
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
257 set urlStr $urlMatch
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
258 } elseif {[regexp "www\.\[^ \]+" $urlStr urlMatch]} {
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
259 set urlStr "http://$urlMatch"
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
260 } elseif {[regexp "ftp\.\[^ \]+" $urlStr urlMatch]} {
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
261 set urlStr "ftp://$urlMatch"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
262 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
263
95
687bdd74dfac urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents: 93
diff changeset
264 ### Handle URLs that have an IPv4-address
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
265 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr urlMatch urlProto ni1 ni2 ni3 ni4]} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
266 # Check if the IP is on local network
572
295c225e3152 urllog: Improve invalid/local IPv4 network check
Matti Hamalainen <ccr@tnsp.org>
parents: 571
diff changeset
267 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168)} {
295c225e3152 urllog: Improve invalid/local IPv4 network check
Matti Hamalainen <ccr@tnsp.org>
parents: 571
diff changeset
268 urllog_log "URL pointing to local network, ignored ($urlStr)."
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
269 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
270 }
572
295c225e3152 urllog: Improve invalid/local IPv4 network check
Matti Hamalainen <ccr@tnsp.org>
parents: 571
diff changeset
271 if {$ni1 == 0 || $ni1 >= 255 || $ni2 >= 255 || $ni3 >= 255 || $ni4 >= 255} {
571
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
272 urllog_log "URL pointing to invalid network, ignored ($urlStr)."
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
273 return 0
d4d2fda12308 urllog: Improve URL parsing/validation and protocol guessing.
Matti Hamalainen <ccr@tnsp.org>
parents: 570
diff changeset
274 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
275 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
276
96
e5a6c27be365 urllog: Comments and cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 95
diff changeset
277 ### Check now if we have an ShortURL here ...
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
278 if {[string match "$urllog_shorturl_prefix*" $urlStr]} {
98
fbbe7ee40e2f urllog: Improve one informational / error message.
Matti Hamalainen <ccr@tnsp.org>
parents: 97
diff changeset
279 urllog_log "Ignoring ShortURL from $urlNick: $urlStr"
252
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
280 # set uud ""
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
281 # set usql "SELECT id AS uid, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE utime=$uud"
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
282 # urldb eval $usql {
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
283 # urllog_verb_msg $urlNick $urlChan "'$utitle' - $uurl"
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
284 # return 1
eb2fce89b8ab urllog: Comment out some currently unused code.
Matti Hamalainen <ccr@tnsp.org>
parents: 251
diff changeset
285 # }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
286 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
287 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
288
95
687bdd74dfac urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents: 93
diff changeset
289 ### Get URL protocol component
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
290 set urlProto ""
464
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
291 if {[regexp "(\[a-z\]+)://" $urlStr urlMatch urlProto]} {
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
292 ### Is it a http or ftp url?
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
293 if {$urlProto != "http" && $urlProto != "https" && $urlProto != "ftp"} {
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
294 urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED protocol class ($urlProto)."
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
295 return 0
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
296 }
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
297 } else {
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
298 urllog_log "Broken URL from $urlNick: ($urlStr), no protocol specifier."
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
299 return 0
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
300 }
95
687bdd74dfac urllog: Check if TLS support is enabled when checking if we can fetch title information via HTTP or SSL/HTTP.
Matti Hamalainen <ccr@tnsp.org>
parents: 93
diff changeset
301
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
302 ### Check the PORT (if the ":" is there)
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
303 set urlRecord [split $urlStr "/"]
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
304 set urlHostName [lindex $urlRecord 2]
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
305 set urlPort [lindex [split $urlHostName ":"] end]
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
306
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
307 if {![urllog_isnumber $urlPort] && $urlPort != "" && $urlPort != $urlHostName} {
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
308 urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $urlPort"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
309 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
310 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
311
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
312
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
313 set urlStr [string map $urllog_httprep $urlStr]
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
314 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
315 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
316
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
317
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
318 #-------------------------------------------------------------------------
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
319 proc urllog_check_url {urlStr urlNick urlHost urlChan} {
299
1ff281e821a3 urllog: Make rasiatube hack configurable.
Matti Hamalainen <ccr@tnsp.org>
parents: 298
diff changeset
320 global urllog_encoding http_tls_support urlmsg_errorgettingdoc
581
148c7553c50f urllog: Remove rasiatube hack setting, it's not used.
Matti Hamalainen <ccr@tnsp.org>
parents: 579
diff changeset
321 global urllog_extra_checks urllog_extra_strict
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
322
91
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
323 ### Does the URL already exist?
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
324 if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} {
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
325 return 1
6f4bfd8e9447 urllog: Reorder code and make it simpler by removing duplicate checks.
Matti Hamalainen <ccr@tnsp.org>
parents: 90
diff changeset
326 }
424
825cac46b1cb Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 422
diff changeset
327
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
328 ### Validate URL compoments, etc.
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
329 set urlProto ""
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
330 set urlHostName ""
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
331 if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} {
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
332 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
333 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
334
267
da239a953e24 urllog: Change some setting names, etc.
Matti Hamalainen <ccr@tnsp.org>
parents: 264
diff changeset
335 ### Do we perform additional checks?
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
336 if {$urllog_extra_checks == 0 || !(($http_tls_support != 0 && $urlProto == "https") || $urlProto == "http")} {
230
3e3756b113a1 urllog: Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 229
diff changeset
337 # No optional checks, or it's not http/https.
306
9858b93387a2 urllog: 100L.
Matti Hamalainen <ccr@tnsp.org>
parents: 304
diff changeset
338 if {$urllog_extra_strict == 0} {
304
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
339 # Strict checking disabled, so add the URL, if it does not exist already.
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
340 urllog_addurl $urlStr $urlNick $urlHost $urlChan ""
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
341 return 1
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
342 } elseif {$http_tls_support == 0 && $urlProto == "https"} {
304
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
343 # Strict ENABLED: If TLS support is disabled and we have https, do nothing
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
344 return 1
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
345 } elseif {$urlProto != "http" && $urlProto != "https"} {
304
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
346 # Strict ENABLED: It's not http, or https
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
347 return 1
f1589fe20732 urllog: Added urllog_extra_strict option.
Matti Hamalainen <ccr@tnsp.org>
parents: 302
diff changeset
348 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
349 }
7
50b52294e93e urllog: Strip &rlm; entities from titles; Some work on SSL/https support.
Matti Hamalainen <ccr@tnsp.org>
parents: 4
diff changeset
350
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
351 ### Does the document pointed by the URL exist?
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
352 if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
353 return 1
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
354 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
355
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
356 ### Handle redirects
583
d5fc3ecee4c7 urllog: Handle 303 redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 581
diff changeset
357 if {$ucode >= 301 && $ucode <= 303} {
584
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
358 set nurlStr $umeta(location)
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
359 if {![regexp "\[a-z\]+://" $nurlStr]} {
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
360 if {[string range $nurlStr 0 0] != "/"} {
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
361 append nurlStr "/"
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
362 }
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
363 set nurlStr "${urlProto}://${urlHostName}${nurlStr}"
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
364 }
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
365 urllog_log "Redirection: $urlStr -> $nurlStr"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
366 set urlStr $nurlStr
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
367
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
368 if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} {
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
369 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
370 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
371
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
372 if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
373 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
374 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
375 }
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
376
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
377 ### Handle 2nd level redirects
583
d5fc3ecee4c7 urllog: Handle 303 redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 581
diff changeset
378 if {$ucode >= 301 && $ucode <= 303} {
584
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
379 set nurlStr $umeta(location)
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
380 if {![regexp "\[a-z\]+://" $nurlStr]} {
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
381 if {[string range $nurlStr 0 0] != "/"} {
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
382 append nurlStr "/"
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
383 }
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
384 set nurlStr "${urlProto}://${urlHostName}${nurlStr}"
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
385 }
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
386 urllog_log "Redirection #2: $urlStr -> $nurlStr"
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
387 set urlStr $nurlStr
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
388
327
a5282cdc56e6 urllog: Fix redirection handling for HTTP 1.1.
Matti Hamalainen <ccr@tnsp.org>
parents: 319
diff changeset
389 if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} {
251
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
390 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
391 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
392
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
393 if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
394 return 1
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
395 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
396 }
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
397
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
398 # Final document
e59f0c3ea0f4 urllog: Handle first and second level redirects.
Matti Hamalainen <ccr@tnsp.org>
parents: 250
diff changeset
399 if {$ucode >= 200 && $ucode <= 205} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
400 set uenc_doc ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
401 set uenc_http ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
402 set uencoding ""
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
403
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
404 # Get information about specified character encodings
584
9b64f201b3a7 urllog: Use lowercase HTTP metadata keys and sanitize them.
Matti Hamalainen <ccr@tnsp.org>
parents: 583
diff changeset
405 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(content-type) umatches uenc_http]} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
406 # Found character set encoding information in HTTP headers
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
407 }
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
408
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
409 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
410 # Found old style HTML meta tag with character set information
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
411 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
412 # Found HTML5 style meta tag with character set information
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
413 }
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
414
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
415 # Make sanitized versions of the encoding strings
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
416 set uenc_http2 [urllog_sanitize_encoding $uenc_http]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
417 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc]
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
418
311
adc519c72f53 urllog: Various cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 306
diff changeset
419 # Check if the document has specified encoding
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
420 # KLUDGE!
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
421 set uencoding $uenc_http2
318
5d886e2137d5 urllog: Fix character set conversion a bit.
Matti Hamalainen <ccr@tnsp.org>
parents: 315
diff changeset
422 if {$uencoding == "" && $uenc_doc2 != ""} {
5d886e2137d5 urllog: Fix character set conversion a bit.
Matti Hamalainen <ccr@tnsp.org>
parents: 315
diff changeset
423 set uencoding $uenc_doc2
5d886e2137d5 urllog: Fix character set conversion a bit.
Matti Hamalainen <ccr@tnsp.org>
parents: 315
diff changeset
424 } elseif {$uencoding == ""} {
424
825cac46b1cb Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 422
diff changeset
425 # If _NO_ known encoding of any kind, assume the default of iso8859-1
86
4c2b6482c08c urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 84
diff changeset
426 set uencoding "iso8859-1"
4c2b6482c08c urllog: Different strategy for charset encoding conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 84
diff changeset
427 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
428
311
adc519c72f53 urllog: Various cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 306
diff changeset
429 urllog_log "Charsets: http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2' -> '$uencoding'"
adc519c72f53 urllog: Various cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 306
diff changeset
430
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
431 # Get the document title, if any
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
432 set urlTitle ""
470
2faf2eb18f26 urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents: 469
diff changeset
433
473
d155cec91375 urllog: Fixes to Twitter title matching.
Matti Hamalainen <ccr@tnsp.org>
parents: 472
diff changeset
434 if {[regexp -nocase -- "<meta name=\"twitter:title\" content=\"(.\*\?)\"\\s\*\/\?>" $udata umatches urlTitle]} {
470
2faf2eb18f26 urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents: 469
diff changeset
435 # ...
2faf2eb18f26 urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents: 469
diff changeset
436 } elseif {[regexp -nocase -- "<title.\*\?>(.\*\?)</title>" $udata umatches urlTitle]} {
2faf2eb18f26 urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents: 469
diff changeset
437 # ...
2faf2eb18f26 urllog: Add support for Twitter meta titles.
Matti Hamalainen <ccr@tnsp.org>
parents: 469
diff changeset
438 }
313
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
439
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
440 # If facebook, get meta info
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
441 if {[regexp -nocase -- "(http|https):\/\/www.facebook.com" $urlStr]} {
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
442 if {[regexp -nocase -- "<meta name=\"description\" content=\"(.\*\?)\"" $udata umatches urlTmp]} {
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
443 if {$urlTitle != ""} { append urlTitle " :: " }
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
444 append urlTitle $urlTmp
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
445 }
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
446 }
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
447
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
448 # If character set conversion is required, do it now
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
449 if {$urlTitle != "" && $uencoding != ""} {
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
450 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} {
8175ef52889b urllog: Improve URL title functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 312
diff changeset
451 urllog_log "Error in charset conversion: $cerrmsg"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
452 }
150
52350ed97775 urllog: Cleanups, rename/move some global variables.
Matti Hamalainen <ccr@tnsp.org>
parents: 136
diff changeset
453
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
454 # Convert some HTML entities to plaintext and do some cleanup
291
54d34d086b47 urllog: Use the utility lib for entity conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 289
diff changeset
455 set utmp [utl_convert_html_ent $urlTitle]
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
456 regsub -all "\r|\n|\t" $utmp " " utmp
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
457 regsub -all " *" $utmp " " utmp
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
458 set urlTitle [string trim $utmp]
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
459 }
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
460
83
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
461 # Check if the URL already exists, just in case we had some redirects
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
462 if {[urllog_exists $urlStr $urlNick $urlHost $urlChan]} {
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
463 urllog_addurl $urlStr $urlNick $urlHost $urlChan $urlTitle
f171a9fb7b7b urllog: Split urllog_add function to urllog_exists for checking whether given URL already exists in the database. Use urllog_exists where appropriate.
Matti Hamalainen <ccr@tnsp.org>
parents: 82
diff changeset
464 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
465 return 1
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
466 } else {
116
4f3edcf72987 urllog: Improvements in document / HTTP encoding handling and conversion.
Matti Hamalainen <ccr@tnsp.org>
parents: 115
diff changeset
467 urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($ucode)"
224
aaf433ab696a urllog: Improve error messages a bit.
Matti Hamalainen <ccr@tnsp.org>
parents: 223
diff changeset
468 urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$urlStr"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
469 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
470 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
471
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
472
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
473 #-------------------------------------------------------------------------
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
474
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
475
249
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
476 proc urllog_check_line {unick uhost uhand uchan utext} {
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
477 global urllog_log_channels
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
478
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
479 ### Check the nick
87
97c56d1e9ce2 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 86
diff changeset
480 if {$unick == "*"} {
249
d98876dd9ee1 urllog: Rename a function.
Matti Hamalainen <ccr@tnsp.org>
parents: 241
diff changeset
481 urllog_log "urllog_check_line: Nick was wc, this should not happen."
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
482 return 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
483 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
484
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
485 ### Check the channel
315
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
486 if {[utl_match_delim_list $urllog_log_channels $uchan]} {
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
487 ### Do the URL checking
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
488 foreach str [split $utext " "] {
464
506977ea9d0c urllog: Improve URL validation.
Matti Hamalainen <ccr@tnsp.org>
parents: 458
diff changeset
489 if {[regexp "(\[a-z]+://\[^\[:space:\]\]+|^(www|ftp)\.\[^\[:space:\]\]+)" $str ulink]} {
315
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
490 urllog_check_url $str $unick $uhost $uchan
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
491 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
492 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
493 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
494
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
495 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
496 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
497
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
498
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
499 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
500 ### Parse arguments, find and show the results
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
501 proc urllog_find {unick uhand uchan utext upublic} {
62
6428b1bcb34b urllog: Remove some global variable references where they are not used.
Matti Hamalainen <ccr@tnsp.org>
parents: 50
diff changeset
502 global urllog_shorturl urldb
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
503 global urllog_showmax_pub urllog_showmax_priv urlmsg_nomatch
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
504
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
505 if {$upublic == 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
506 set ulimit 5
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
507 } else {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
508 set ulimit 3
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
509 }
19
9cf22053e5da Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 18
diff changeset
510
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
511 ### Parse the given command
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
512 urllog_log "$unick/$uhand searched URL: $utext"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
513
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
514 set ftokens [split $utext " "]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
515 set fpatlist ""
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
516 foreach ftoken $ftokens {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
517 set fprefix [string range $ftoken 0 0]
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
518 set fpattern [string range $ftoken 1 end]
295
141bb4a2b76f utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents: 294
diff changeset
519 set qpattern "'%[utl_escape $fpattern]%'"
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
520
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
521 if {$fprefix == "-"} {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
522 lappend fpatlist "(url NOT LIKE $qpattern OR title NOT LIKE $qpattern)"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
523 } elseif {$fprefix == "%"} {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
524 lappend fpatlist "user LIKE $qpattern"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
525 } elseif {$fprefix == "@"} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
526 # foo
112
fae3dd7a8b20 urllog: Oops, a typo in variable name. Fixed.
Matti Hamalainen <ccr@tnsp.org>
parents: 111
diff changeset
527 } elseif {$fprefix == "+"} {
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
528 lappend fpatlist "(url LIKE $qpattern OR title LIKE $qpattern)"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
529 } else {
295
141bb4a2b76f utillib: utl_escape (which will be deprecated soon).
Matti Hamalainen <ccr@tnsp.org>
parents: 294
diff changeset
530 set qpattern "'%[utl_escape $ftoken]%'"
128
0d21b9d1d2b9 urllog: Improve search functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 127
diff changeset
531 lappend fpatlist "(url LIKE $qpattern OR title LIKE $qpattern)"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
532 }
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
533 }
19
9cf22053e5da Repair !urlfind functionality.
Matti Hamalainen <ccr@tnsp.org>
parents: 18
diff changeset
534
27
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
535 if {[llength $fpatlist] > 0} {
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
536 set fquery "WHERE [join $fpatlist " AND "]"
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
537 } else {
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
538 set fquery ""
6e381916b016 Some fixes in the query mechanisms of QuoteDB and URLLog.
Matti Hamalainen <ccr@tnsp.org>
parents: 20
diff changeset
539 }
68
3762c621d1c3 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 65
diff changeset
540
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
541 set iresults 0
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
542 set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost FROM urls $fquery ORDER BY utime DESC LIMIT $ulimit"
68
3762c621d1c3 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 65
diff changeset
543 urldb eval $usql {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
544 incr iresults
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
545 set shortURL $uurl
82
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
546 if {$urllog_shorturl != 0 && $uid != ""} {
1bbc79f41a1c urllog: Rename few variables for clarity.
Matti Hamalainen <ccr@tnsp.org>
parents: 81
diff changeset
547 set shortURL "$shortURL [urllog_get_short $uid]"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
548 }
422
880a07485275 Add utl_ctime() to utillib and use it elsewhere.
Matti Hamalainen <ccr@tnsp.org>
parents: 372
diff changeset
549 urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[utl_ctime $utime])"
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
550 }
424
825cac46b1cb Cosmetic / stray trailing whitespace cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 422
diff changeset
551
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
552 if {$iresults == 0} {
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
553 # If no URLs were found
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
554 urllog_msg $upublic $unick $uchan $urlmsg_nomatch
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
555 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
556
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
557 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
558 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
559
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
560
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
561 #-------------------------------------------------------------------------
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
562 ### Finding binded functions
531
137d61a0b5b7 urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents: 503
diff changeset
563 proc urllog_pub_cmd_urlfind {unick uhost uhand uchan utext} {
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
564 global urllog_search_channels
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
565
315
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
566 if {[utl_match_delim_list $urllog_search_channels $uchan]} {
7a987b22a817 urllog: Add new configuration option urllog_msg_channels.
Matti Hamalainen <ccr@tnsp.org>
parents: 313
diff changeset
567 return [urllog_find $unick $uhand $uchan $utext 1]
219
4e09bcc48851 urllog: Add settings for specifying channels where URL logging is active, and where !urlfind functionality works (separately, if so desired.)
Matti Hamalainen <ccr@tnsp.org>
parents: 218
diff changeset
568 }
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
569 return 0
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
570 }
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
571
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
572
531
137d61a0b5b7 urllog: Rename two functions.
Matti Hamalainen <ccr@tnsp.org>
parents: 503
diff changeset
573 proc urllog_msg_cmd_urlfind {unick uhost uhand utext} {
28
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
574 urllog_find $unick $uhand "" $utext 0
a59e312b1513 Remove tabs and reindent.
Matti Hamalainen <ccr@tnsp.org>
parents: 27
diff changeset
575 return 0
3
8003090caa35 Lots of code cleanups, add "fixer" for RasiaTube links (which suck) to point directly to Youtube.
Matti Hamalainen <ccr@tnsp.org>
parents: 0
diff changeset
576 }
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
577
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
578
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
579 #-------------------------------------------------------------------------
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
580 # Script initialization
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
581 #-------------------------------------------------------------------------
570
0e3ee1f51c80 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 563
diff changeset
582 ### Initialization messages
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
583 putlog "$urllog_message"
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
584
570
0e3ee1f51c80 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 563
diff changeset
585
0e3ee1f51c80 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 563
diff changeset
586 ### Miscellaneous init messages
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
587 if {$urllog_extra_checks != 0} {
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
588 putlog " - Additional URL validity checks enabled."
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
589 }
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
590
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
591 if {$urllog_verbose != 0} {
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
592 putlog " - Verbose mode enabled."
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
593 }
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
594
570
0e3ee1f51c80 urllog: Cosmetics.
Matti Hamalainen <ccr@tnsp.org>
parents: 563
diff changeset
595
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
596 ### HTTP module initialization
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
597 if {[info exists http_user_agent] && $http_user_agent != ""} {
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
598 ::http::config -useragent $http_user_agent
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
599 } else {
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
600 ::http::config -useragent "$urllog_name/$urllog_version"
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
601 }
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
602
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
603 if {[info exists http_use_proxy] && $http_use_proxy != 0} {
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
604 ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
605 putlog " - Using proxy $http_proxy_host:$http_proxy_port"
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
606 }
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
607
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
608 if {[info exists http_tls_support] && $http_tls_support != 0} {
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
609 package require tls
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
610 ::http::register https 443 [list ::tls::socket -request true -require true -ssl2 false -ssl3 false -tls1 true -tls1.1 true -tls1.2 true -cadir $http_tls_cadir -autoservername true]
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
611 putlog " - TLS/SSL support enabled."
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
612 }
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
613
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
614
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
615 ### SQLite database initialization
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
616 if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} {
561
bdccc83a1c22 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 560
diff changeset
617 putlog "Could not open SQLite3 database '${urllog_db_file}': ${uerrmsg}"
560
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
618 exit 2
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
619 }
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
620
17183d85ab62 urllog: Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 545
diff changeset
621
0
1c4e2814cd41 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
622 # end of script