comparison fetch_feeds.tcl @ 656:7192d94f8c28

fetch_feeds: Copy improved HTTP request code from urllog script.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 19 Feb 2021 19:30:30 +0200
parents 4b985abf5aba
children f46c152183a2
comparison
equal deleted inserted replaced
655:4b985abf5aba 656:7192d94f8c28
44 upvar $urlSCode uscode 44 upvar $urlSCode uscode
45 upvar $urlCode ucode 45 upvar $urlCode ucode
46 upvar $urlData udata 46 upvar $urlData udata
47 upvar $urlMeta umeta 47 upvar $urlMeta umeta
48 48
49 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers {Accept-Encoding identity}]} uerrmsg]} { 49 set urlHeaders {}
50 lappend urlHeaders "Accept-Encoding" "identity"
51 #lappend urlHeaders "Connection" "keep-alive"
52
53 ### Perform request
54 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers $urlHeaders]} uerrmsg]} {
50 puts "HTTP request failed: $uerrmsg" 55 puts "HTTP request failed: $uerrmsg"
51 return 0 56 return 0
52 } 57 }
53 58
54 set ustatus [::http::status $utoken] 59 ### Check status
55 if {$ustatus == "timeout"} {
56 puts "HTTP request timed out ($urlStr)"
57 return 0
58 }
59
60 if {$ustatus != "ok"} {
61 puts "Error in HTTP transaction: [::http::error $utoken] ($urlStr)"
62 return 0
63 }
64
65 set ustatus [::http::status $utoken] 60 set ustatus [::http::status $utoken]
66 set uscode [::http::code $utoken] 61 set uscode [::http::code $utoken]
67 set ucode [::http::ncode $utoken] 62 set ucode [::http::ncode $utoken]
63
64 if {$ustatus != "ok"} {
65 puts "Error in HTTP request: $ustatus / $uscode ($urlStr)"
66 return 0
67 }
68
69 ### Get data
68 set udata [::http::data $utoken] 70 set udata [::http::data $utoken]
69 array set umeta [::http::meta $utoken] 71 array set umeta [::http::meta $utoken]
70 ::http::cleanup $utoken 72 ::http::cleanup $utoken
71 73
72 return 1 74 ### Sanitize the metadata KEYS
75 foreach {ukey uvalue} [array get umeta] {
76 set ukey [string tolower $ukey]
77 set umeta($ukey) $uvalue
78 }
79
80 ### Perform encoding conversion if necessary
81 if {$ucode >= 200 && $ucode <= 205} {
82 set uenc_doc ""
83 set uenc_http ""
84 set uencoding ""
85
86 if {[info exists umeta(content-type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(content-type) -> uenc_http]} {
87 # Found character set encoding information in HTTP headers
88 }
89
90 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata -> uenc_doc]} {
91 # Found old style HTML meta tag with character set information
92 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata -> uenc_doc]} {
93 # Found HTML5 style meta tag with character set information
94 }
95
96 # Make sanitized versions of the encoding strings
97 set uenc_http2 [fetch_sanitize_encoding $uenc_http]
98 set uenc_doc2 [fetch_sanitize_encoding $uenc_doc]
99
100 # Check if the document has specified encoding
101 set uencoding $uenc_http2
102 if {$uencoding == "" && $uenc_doc2 != ""} {
103 set uencoding $uenc_doc2
104 } elseif {$uencoding == ""} {
105 # If _NO_ known encoding of any kind, assume the default of iso8859-1
106 set uencoding "iso8859-1"
107 }
108
109 #puts "Charsets: http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2' -> '$uencoding'"
110
111 # Get the document title, if any
112 if {$uencoding != ""} {
113 if {[catch {set udata [encoding convertfrom $uencoding $udata]} cerrmsg]} {
114 puts "Error in charset conversion: $urlStr: $cerrmsg"
115 return 0
116 }
117 }
118 return 1
119 } else {
120 return 0
121 }
73 } 122 }
74 123
75 124
76 proc add_entry {uname uprefix uurl utitle} { 125 proc add_entry {uname uprefix uurl utitle} {
77 global currclock feeds_db nitems 126 global currclock feeds_db nitems