changeset 585:a5dc31f5b44e

urllog: Clean up redirection handling and improve error checking and handling.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 24 Jan 2021 23:13:01 +0200
parents 9b64f201b3a7
children 23f2c71fdb90
files urllog.tcl
diffstat 1 files changed, 78 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/urllog.tcl	Sun Jan 24 21:47:27 2021 +0200
+++ b/urllog.tcl	Sun Jan 24 23:13:01 2021 +0200
@@ -190,14 +190,30 @@
 
 
 #-------------------------------------------------------------------------
-proc urllog_dorequest { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } {
+proc urllog_clear_request { urlStatus urlSCode urlCode urlData urlMeta } {
+  upvar $urlStatus ustatus
+  upvar $urlSCode uscode
+  upvar $urlCode ucode
+  upvar $urlData udata
+  upvar $urlMeta umeta
+
+  unset ustatus
+  unset uscode
+  unset ucode
+  unset udata
+  array unset umeta
+}
+
+
+#-------------------------------------------------------------------------
+proc urllog_do_request { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } {
   global urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc
 
-  upvar 1 $urlStatus ustatus
-  upvar 1 $urlSCode uscode
-  upvar 1 $urlCode ucode
-  upvar 1 $urlData udata
-  upvar 1 $urlMeta umeta
+  upvar $urlStatus ustatus
+  upvar $urlSCode uscode
+  upvar $urlCode ucode
+  upvar $urlData udata
+  upvar $urlMeta umeta
 
   set urlHeaders {}
   lappend urlHeaders "Accept-Encoding" "identity"
@@ -242,9 +258,9 @@
 proc urllog_validate_url { urlNick urlChan urlMStr urlMProto urlMHostName } {
   global urlmsg_nosuchhost urllog_httprep
   global urllog_shorturl_prefix urllog_shorturl
-  upvar 1 $urlMStr urlStr
-  upvar 1 $urlMProto urlProto
-  upvar 1 $urlMHostName urlHostName
+  upvar $urlMStr urlStr
+  upvar $urlMProto urlProto
+  upvar $urlMHostName urlHostName
 
   ### Hack for removing parenthesis around an URL
   if {[regexp {^\((.+)\)$} $urlStr urlMatch urlClean]} {
@@ -316,6 +332,51 @@
 
 
 #-------------------------------------------------------------------------
+proc urllog_handle_redirect {urlNick urlHost urlChan urlRedirLevel urlProto urlHostName urlStr urlStatus urlSCode urlCode urlData urlMeta} {
+
+  upvar $urlProto uproto
+  upvar $urlHostName uhostname
+  upvar $urlStr ustr
+  upvar $urlStatus ustatus
+  upvar $urlSCode uscode
+  upvar $urlCode ucode
+  upvar $urlData udata
+  upvar $urlMeta umeta
+
+  if {$ucode >= 301 && $ucode <= 303} {
+    if {[llength [array get umeta "location"]] == 0} {
+      urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc (invalid redirect without Location header)"
+      urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$ustr : Invalid redirect without Location header (redirLevel=${urlRedirLevel}"
+      return 0
+    }
+
+    set nustr $umeta(location)
+    if {![regexp "\[a-z\]+://" $nustr]} {
+      if {[string range $nustr 0 0] != "/"} {
+        append nustr "/"
+      }
+      set nustr "${uproto}://${uhostname}${nustr}"
+    }
+
+    urllog_log "Redirection #${urlRedirLevel}: $ustr -> $nustr"
+    set ustr $nustr
+
+    if {![urllog_validate_url $urlNick $urlChan ustr uproto uhostname]} {
+      return 0
+    }
+
+    urllog_clear_request ustatus uscode ucode udata umeta
+    if {![urllog_do_request $urlNick $urlChan $ustr ustatus uscode ucode udata umeta]} {
+      urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)"
+      return 0
+    }
+  }
+
+  return 1
+}
+
+
+#-------------------------------------------------------------------------
 proc urllog_check_url {urlStr urlNick urlHost urlChan} {
   global urllog_encoding http_tls_support urlmsg_errorgettingdoc
   global urllog_extra_checks urllog_extra_strict
@@ -349,50 +410,17 @@
   }
 
   ### Does the document pointed by the URL exist?
-  if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
+  if {![urllog_do_request $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
     return 1
   }
 
-  ### Handle redirects
-  if {$ucode >= 301 && $ucode <= 303} {
-    set nurlStr $umeta(location)
-    if {![regexp "\[a-z\]+://" $nurlStr]} {
-      if {[string range $nurlStr 0 0] != "/"} {
-        append nurlStr "/"
-      }
-      set nurlStr "${urlProto}://${urlHostName}${nurlStr}"
-    }
-    urllog_log "Redirection: $urlStr -> $nurlStr"
-    set urlStr $nurlStr
-
-    if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} {
-      return 1
-    }
-
-    if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
-      return 1
-    }
+  ### Handle redirects of 2 levels
+  if {![urllog_handle_redirect $urlNick $urlHost $urlChan 1 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} {
+    return 1
   }
 
-  ### Handle 2nd level redirects
-  if {$ucode >= 301 && $ucode <= 303} {
-    set nurlStr $umeta(location)
-    if {![regexp "\[a-z\]+://" $nurlStr]} {
-      if {[string range $nurlStr 0 0] != "/"} {
-        append nurlStr "/"
-      }
-      set nurlStr "${urlProto}://${urlHostName}${nurlStr}"
-    }
-    urllog_log "Redirection #2: $urlStr -> $nurlStr"
-    set urlStr $nurlStr
-
-    if {![urllog_validate_url $urlNick $urlChan urlStr urlProto urlHostName]} {
-      return 1
-    }
-
-    if {![urllog_dorequest $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} {
-      return 1
-    }
+  if {![urllog_handle_redirect $urlNick $urlHost $urlChan 2 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} {
+    return 1
   }
 
   # Final document
@@ -464,8 +492,9 @@
     }
     return 1
   } else {
-    urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($ucode)"
+    urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)"
     urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$urlStr"
+    return 0
   }
 }