Mercurial > hg > egg-tcls
changeset 313:8175ef52889b
urllog: Improve URL title functionality.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 27 Feb 2015 14:38:25 +0200 |
parents | 2371cd2b3f67 |
children | 1cf897164a25 |
files | urllog.tcl |
diffstat | 1 files changed, 14 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/urllog.tcl Sat Jan 31 01:15:08 2015 +0200 +++ b/urllog.tcl Fri Feb 27 14:38:25 2015 +0200 @@ -471,12 +471,20 @@ # Get the document title, if any set urlTitle "" - if {[regexp -nocase -- "<title>(.\*\?)</title>" $udata umatches urlTitle]} { - # If character set conversion is required, do it now - if {$uencoding != ""} { - if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { - urllog_log "Error in charset conversion: $cerrmsg" - } + set tmpRes [regexp -nocase -- "<title.\*\?>(.\*\?)</title>" $udata umatches urlTitle] + + # If facebook, get meta info + if {[regexp -nocase -- "(http|https):\/\/www.facebook.com" $urlStr]} { + if {[regexp -nocase -- "<meta name=\"description\" content=\"(.\*\?)\"" $udata umatches urlTmp]} { + if {$urlTitle != ""} { append urlTitle " :: " } + append urlTitle $urlTmp + } + } + + # If character set conversion is required, do it now + if {$urlTitle != "" && $uencoding != ""} { + if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { + urllog_log "Error in charset conversion: $cerrmsg" } # Convert some HTML entities to plaintext and do some cleanup