Mercurial > hg > egg-tcls
comparison fetch_feeds.tcl @ 656:7192d94f8c28
fetch_feeds: Copy improved HTTP request code from urllog script.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 19 Feb 2021 19:30:30 +0200 |
parents | 4b985abf5aba |
children | f46c152183a2 |
comparison
equal
deleted
inserted
replaced
655:4b985abf5aba | 656:7192d94f8c28 |
---|---|
44 upvar $urlSCode uscode | 44 upvar $urlSCode uscode |
45 upvar $urlCode ucode | 45 upvar $urlCode ucode |
46 upvar $urlData udata | 46 upvar $urlData udata |
47 upvar $urlMeta umeta | 47 upvar $urlMeta umeta |
48 | 48 |
49 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers {Accept-Encoding identity}]} uerrmsg]} { | 49 set urlHeaders {} |
50 lappend urlHeaders "Accept-Encoding" "identity" | |
51 #lappend urlHeaders "Connection" "keep-alive" | |
52 | |
53 ### Perform request | |
54 if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers $urlHeaders]} uerrmsg]} { | |
50 puts "HTTP request failed: $uerrmsg" | 55 puts "HTTP request failed: $uerrmsg" |
51 return 0 | 56 return 0 |
52 } | 57 } |
53 | 58 |
54 set ustatus [::http::status $utoken] | 59 ### Check status |
55 if {$ustatus == "timeout"} { | |
56 puts "HTTP request timed out ($urlStr)" | |
57 return 0 | |
58 } | |
59 | |
60 if {$ustatus != "ok"} { | |
61 puts "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" | |
62 return 0 | |
63 } | |
64 | |
65 set ustatus [::http::status $utoken] | 60 set ustatus [::http::status $utoken] |
66 set uscode [::http::code $utoken] | 61 set uscode [::http::code $utoken] |
67 set ucode [::http::ncode $utoken] | 62 set ucode [::http::ncode $utoken] |
63 | |
64 if {$ustatus != "ok"} { | |
65 puts "Error in HTTP request: $ustatus / $uscode ($urlStr)" | |
66 return 0 | |
67 } | |
68 | |
69 ### Get data | |
68 set udata [::http::data $utoken] | 70 set udata [::http::data $utoken] |
69 array set umeta [::http::meta $utoken] | 71 array set umeta [::http::meta $utoken] |
70 ::http::cleanup $utoken | 72 ::http::cleanup $utoken |
71 | 73 |
72 return 1 | 74 ### Sanitize the metadata KEYS |
75 foreach {ukey uvalue} [array get umeta] { | |
76 set ukey [string tolower $ukey] | |
77 set umeta($ukey) $uvalue | |
78 } | |
79 | |
80 ### Perform encoding conversion if necessary | |
81 if {$ucode >= 200 && $ucode <= 205} { | |
82 set uenc_doc "" | |
83 set uenc_http "" | |
84 set uencoding "" | |
85 | |
86 if {[info exists umeta(content-type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(content-type) -> uenc_http]} { | |
87 # Found character set encoding information in HTTP headers | |
88 } | |
89 | |
90 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata -> uenc_doc]} { | |
91 # Found old style HTML meta tag with character set information | |
92 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata -> uenc_doc]} { | |
93 # Found HTML5 style meta tag with character set information | |
94 } | |
95 | |
96 # Make sanitized versions of the encoding strings | |
97 set uenc_http2 [fetch_sanitize_encoding $uenc_http] | |
98 set uenc_doc2 [fetch_sanitize_encoding $uenc_doc] | |
99 | |
100 # Check if the document has specified encoding | |
101 set uencoding $uenc_http2 | |
102 if {$uencoding == "" && $uenc_doc2 != ""} { | |
103 set uencoding $uenc_doc2 | |
104 } elseif {$uencoding == ""} { | |
105 # If _NO_ known encoding of any kind, assume the default of iso8859-1 | |
106 set uencoding "iso8859-1" | |
107 } | |
108 | |
109 #puts "Charsets: http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2' -> '$uencoding'" | |
110 | |
111 # Get the document title, if any | |
112 if {$uencoding != ""} { | |
113 if {[catch {set udata [encoding convertfrom $uencoding $udata]} cerrmsg]} { | |
114 puts "Error in charset conversion: $urlStr: $cerrmsg" | |
115 return 0 | |
116 } | |
117 } | |
118 return 1 | |
119 } else { | |
120 return 0 | |
121 } | |
73 } | 122 } |
74 | 123 |
75 | 124 |
76 proc add_entry {uname uprefix uurl utitle} { | 125 proc add_entry {uname uprefix uurl utitle} { |
77 global currclock feeds_db nitems | 126 global currclock feeds_db nitems |