changeset 313:8175ef52889b

urllog: Improve URL title functionality.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 27 Feb 2015 14:38:25 +0200
parents 2371cd2b3f67
children 1cf897164a25
files urllog.tcl
diffstat 1 files changed, 14 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/urllog.tcl	Sat Jan 31 01:15:08 2015 +0200
+++ b/urllog.tcl	Fri Feb 27 14:38:25 2015 +0200
@@ -471,12 +471,20 @@
 
     # Get the document title, if any
     set urlTitle ""
-    if {[regexp -nocase -- "<title>(.\*\?)</title>" $udata umatches urlTitle]} {
-      # If character set conversion is required, do it now
-      if {$uencoding != ""} {
-      	if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} {
-          urllog_log "Error in charset conversion: $cerrmsg"
-        }
+    set tmpRes [regexp -nocase -- "<title.\*\?>(.\*\?)</title>" $udata umatches urlTitle]
+
+    # If facebook, get meta info
+    if {[regexp -nocase -- "(http|https):\/\/www.facebook.com" $urlStr]} {
+      if {[regexp -nocase -- "<meta name=\"description\" content=\"(.\*\?)\"" $udata umatches urlTmp]} {
+        if {$urlTitle != ""} { append urlTitle " :: " }
+        append urlTitle $urlTmp
+      }
+    }
+
+    # If character set conversion is required, do it now
+    if {$urlTitle != "" && $uencoding != ""} {
+      if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} {
+        urllog_log "Error in charset conversion: $cerrmsg"
       }
 
       # Convert some HTML entities to plaintext and do some cleanup