# HG changeset patch # User Matti Hamalainen # Date 1611522781 -7200 # Node ID a5dc31f5b44e7a7045c07ba580563727da50f476 # Parent 9b64f201b3a72c9a2e7a3a1571eb8722a4aaf1ad urllog: Clean up redirection handling and improve error checking and handling. diff -r 9b64f201b3a7 -r a5dc31f5b44e urllog.tcl --- a/urllog.tcl Sun Jan 24 21:47:27 2021 +0200 +++ b/urllog.tcl Sun Jan 24 23:13:01 2021 +0200 @@ -190,14 +190,30 @@ #------------------------------------------------------------------------- -proc urllog_dorequest { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } { +proc urllog_clear_request { urlStatus urlSCode urlCode urlData urlMeta } { + upvar $urlStatus ustatus + upvar $urlSCode uscode + upvar $urlCode ucode + upvar $urlData udata + upvar $urlMeta umeta + + unset ustatus + unset uscode + unset ucode + unset udata + array unset umeta +} + + +#------------------------------------------------------------------------- +proc urllog_do_request { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } { global urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc - upvar 1 $urlStatus ustatus - upvar 1 $urlSCode uscode - upvar 1 $urlCode ucode - upvar 1 $urlData udata - upvar 1 $urlMeta umeta + upvar $urlStatus ustatus + upvar $urlSCode uscode + upvar $urlCode ucode + upvar $urlData udata + upvar $urlMeta umeta set urlHeaders {} lappend urlHeaders "Accept-Encoding" "identity" @@ -242,9 +258,9 @@ proc urllog_validate_url { urlNick urlChan urlMStr urlMProto urlMHostName } { global urlmsg_nosuchhost urllog_httprep global urllog_shorturl_prefix urllog_shorturl - upvar 1 $urlMStr urlStr - upvar 1 $urlMProto urlProto - upvar 1 $urlMHostName urlHostName + upvar $urlMStr urlStr + upvar $urlMProto urlProto + upvar $urlMHostName urlHostName ### Hack for removing parenthesis around an URL if {[regexp {^\((.+)\)$} $urlStr urlMatch urlClean]} { @@ -316,6 +332,51 @@ #------------------------------------------------------------------------- +proc urllog_handle_redirect {urlNick urlHost urlChan urlRedirLevel urlProto urlHostName urlStr urlStatus urlSCode urlCode urlData urlMeta} { + + upvar $urlProto uproto + upvar $urlHostName uhostname + upvar $urlStr ustr + upvar $urlStatus ustatus + upvar $urlSCode uscode + upvar $urlCode ucode + upvar $urlData udata + upvar $urlMeta umeta + + if {$ucode >= 301 && $ucode <= 303} { + if {[llength [array get umeta "location"]] == 0} { + urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc (invalid redirect without Location header)" + urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$ustr : Invalid redirect without Location header (redirLevel=${urlRedirLevel}" + return 0 + } + + set nustr $umeta(location) + if {![regexp "\[a-z\]+://" $nustr]} { + if {[string range $nustr 0 0] != "/"} { + append nustr "/" + } + set nustr "${uproto}://${uhostname}${nustr}" + } + + urllog_log "Redirection #${urlRedirLevel}: $ustr -> $nustr" + set ustr $nustr + + if {![urllog_validate_url $urlNick $urlChan ustr uproto uhostname]} { + return 0 + } + + urllog_clear_request ustatus uscode ucode udata umeta + if {![urllog_do_request $urlNick $urlChan $ustr ustatus uscode ucode udata umeta]} { + urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)" + return 0 + } + } + + return 1 +} + + +#------------------------------------------------------------------------- proc urllog_check_url {urlStr urlNick urlHost urlChan} { global urllog_encoding http_tls_support urlmsg_errorgettingdoc global urllog_extra_checks urllog_extra_strict @@ -349,50 +410,17 @@ } ### Does the document pointed by the URL exist? - if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { + if {![urllog_do_request $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { return 1 } - ### Handle redirects - if {$ucode >= 301 && $ucode <= 303} { - set nurlStr $umeta(location) - if {![regexp "\[a-z\]+://" $nurlStr]} { - if {[string range $nurlStr 0 0] != "/"} { - append nurlStr "/" - } - set nurlStr "${urlProto}://${urlHostName}${nurlStr}" - } - urllog_log "Redirection: $urlStr -> $nurlStr" - set urlStr $nurlStr - - if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} { - return 1 - } - - if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { - return 1 - } + ### Handle redirects of 2 levels + if {![urllog_handle_redirect $urlNick $urlHost $urlChan 1 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { + return 1 } - ### Handle 2nd level redirects - if {$ucode >= 301 && $ucode <= 303} { - set nurlStr $umeta(location) - if {![regexp "\[a-z\]+://" $nurlStr]} { - if {[string range $nurlStr 0 0] != "/"} { - append nurlStr "/" - } - set nurlStr "${urlProto}://${urlHostName}${nurlStr}" - } - urllog_log "Redirection #2: $urlStr -> $nurlStr" - set urlStr $nurlStr - - if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} { - return 1 - } - - if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { - return 1 - } + if {![urllog_handle_redirect $urlNick $urlHost $urlChan 2 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { + return 1 } # Final document @@ -464,8 +492,9 @@ } return 1 } else { - urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($ucode)" + urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)" urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$urlStr" + return 0 } }