Mercurial > hg > egg-tcls
comparison urllog.tcl @ 571:d4d2fda12308
urllog: Improve URL parsing/validation and protocol guessing.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 27 Sep 2020 14:51:19 +0300 |
parents | 0e3ee1f51c80 |
children | 295c225e3152 |
comparison
equal
deleted
inserted
replaced
570:0e3ee1f51c80 | 571:d4d2fda12308 |
---|---|
239 global urllog_shorturl_prefix urllog_shorturl urllog_check_tld | 239 global urllog_shorturl_prefix urllog_shorturl urllog_check_tld |
240 upvar 1 $urlMStr urlStr | 240 upvar 1 $urlMStr urlStr |
241 upvar 1 $urlMProto urlProto | 241 upvar 1 $urlMProto urlProto |
242 upvar 1 $urlMHostName urlHostName | 242 upvar 1 $urlMHostName urlHostName |
243 | 243 |
244 ### Try to guess the URL protocol component (if it is missing) | |
245 set u_checktld 1 | 244 set u_checktld 1 |
246 if {![string match "http://*" $urlStr] && ![string match "https://*" $urlStr] && ![string match "ftp://*" $urlStr] && ![string match "*://*" $urlStr]} { | 245 |
247 if {[string match "*www.*" $urlStr]} { | 246 ### Hack for removing parenthesis around an URL |
248 set urlStr "http://$urlStr" | 247 if {[regexp {^\((.+)\)$} $urlStr urlMatch urlClean]} { |
249 } elseif {[string match "*ftp.*" $urlStr]} { | 248 set urlStr $urlClean |
250 set urlStr "ftp://$urlStr" | 249 } |
251 } | 250 |
251 ### Clean excess stuff, if any, and attempt to | |
252 ### guess the URL protocol component if it is missing | |
253 if {[regexp "(\[a-z\]+)://\[^ \]+" $urlStr urlMatch urlProto]} { | |
254 set urlStr $urlMatch | |
255 } elseif {[regexp "www\.\[^ \]+" $urlStr urlMatch]} { | |
256 set urlStr "http://$urlMatch" | |
257 } elseif {[regexp "ftp\.\[^ \]+" $urlStr urlMatch]} { | |
258 set urlStr "ftp://$urlMatch" | |
252 } | 259 } |
253 | 260 |
254 ### Handle URLs that have an IPv4-address | 261 ### Handle URLs that have an IPv4-address |
255 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr urlMatch urlProto ni1 ni2 ni3 ni4]} { | 262 if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr urlMatch urlProto ni1 ni2 ni3 ni4]} { |
256 # Check if the IP is on local network | 263 # Check if the IP is on local network |
257 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168) || $ni1 == 0} { | 264 if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168) || $ni1 == 0} { |
258 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." | 265 urllog_log "URL pointing to local or invalid network, ignored ($urlStr)." |
266 return 0 | |
267 } | |
268 if {$ni1 >= 255 || $ni2 >= 255 || $ni3 >= 255 || $ni4 >= 255} { | |
269 urllog_log "URL pointing to invalid network, ignored ($urlStr)." | |
259 return 0 | 270 return 0 |
260 } | 271 } |
261 # Skip TLD check for URLs with IP address | 272 # Skip TLD check for URLs with IP address |
262 set u_checktld 0 | 273 set u_checktld 0 |
263 } | 274 } |