# HG changeset patch # User Matti Hamalainen # Date 1611741680 -7200 # Node ID 2294b73df2cf289854520ee96cff7dab0cee709a # Parent b07ff5123baf970039d60c36dd01fbe7c8de6277 urllog: Massive refactoring to use similar message system as other scripts, and general cleanups to the code. This breaks configuration backwards compatibility completely. diff -r b07ff5123baf -r 2294b73df2cf config.urllog.example --- a/config.urllog.example Wed Jan 27 10:57:13 2021 +0200 +++ b/config.urllog.example Wed Jan 27 12:01:20 2021 +0200 @@ -5,6 +5,19 @@ ########################################################################## ### +### Bind commands +### +bind pub - !urlfind urllog_cmd_pub_find +bind msg - !urlfind urllog_cmd_msg_find +bind pubm - *.* urllog_check_line +bind topc - *.* urllog_check_line + + +# Name of the command used for help texts +set urllog_cmd_name "!urlfind" + + +### ### General options ### @@ -15,7 +28,11 @@ # Channels where URLLog announces URL titles and so on # By default we set this to same as urllog_log_channels -set urllog_msg_channels $urllog_log_channels +set urllog_pub_channels $urllog_log_channels + +# Channels where !urlfind and other commands can be used. +# By default this is set to be the same as urllog_log_channels +set urllog_search_channels $urllog_log_channels # Filename of the SQLite URL database file @@ -36,6 +53,7 @@ ### This is REQUIRED for document title fetching/storage set urllog_extra_checks 1 + ### Set to 1 if you want "strict" extra checking. This will cause ### ftp URLs not being added, and if TLS support is disabled, https URLs ### will not be added either. You probably don't want this set. @@ -51,10 +69,6 @@ ### Search related settings ### -# Channels where !urlfind and other commands can be used. -# By default this is set to be the same as urllog_log_channels -set urllog_search_channels $urllog_log_channels - # Limit how many URLs should the "!urlfind" command show at most. set urllog_showmax_pub 3 @@ -80,9 +94,6 @@ # 0 = ShortURLs not shown in any bot actions set urllog_shorturl 1 -# Max length of original URL to be shown, rest is chopped -# off if the URL is longer than the specified amount. -set urllog_shorturl_orig 30 # Web server URL that handles redirects of ShortURLs set urllog_shorturl_prefix "https://tnsp.org/u/" @@ -111,24 +122,47 @@ ### ### Message texts (informal, errors, etc.) ### - -# No such host was found -set urlmsg_nosuchhost "ei tommosta oo!" - -# Could not connect host (I/O errors etc) -set urlmsg_ioerror "kraak, virhe yhdynnässä." - -# HTTP timeout occured -set urlmsg_timeout "ei jaksa ootella" +# Max length of original URL to be shown, rest is chopped +# off if the URL is longer than the specified amount. +set urllog_shorturl_max_orig 35 -# No such document was found or other error -set urlmsg_errorgettingdoc "siitosvirhe" - -# URL was already known (was in database) -set urlmsg_alreadyknown "wanha!" -#set urlmsg_alreadyknown "Empiiristen havaintojen perusteella ja tällä sovellutusalueella esiintyneisiin aikaisempiin kontekstuaalisiin ilmaisuihin viitaten uskallan todeta, että sovellukseen ilmoittamasi tietoverkko-osoite oli kronologisti ajatellen varsin postpresentuaalisesti sopimaton ja ennestään hyvin tunnettu." - -# No match was found when searched with !urlfind or other command -set urlmsg_nomatch "Ei osumia." +# Max length of title to be shown on search results +set urllog_title_max 35 +array set urllog_messages { + "err_http_fail" "nyt meni vituix: @3@" + + "err_http_get" "kraak, virhe yhdynnässä: @2@" + + "err_http_status" "voi perse: @2@, @3@" + + "err_url_local_net" "en tykkää #1." + "err_url_invalid_net" "en tykkää #2." + "err_url_proto_class" "en tykkää #3." + "err_url_proto_no_class" "en tykkää #4." + "err_url_invalid_port" "en tykkää #5." + + "err_redirect_invalid" "paska redirecti @5@" + "err_redirect_fail" "redirekti @5@ feilasi: @1@ / @3@" + + "err_charset" "yhyy, merkistö paske! @3@" + + "url_added_short_has_title" "'@1@' (@4@) @5@" + "url_added_short_no_title" "@4@ @5@" + + "url_added_long_has_title" "'@1@' (@4@)" + "url_added_long_no_title" "@3@" + + "url_known_short" "wanha!" + "url_known_long" "wanha!" + + + "search_result_title" "'@2@' " + "search_result_no_title" "" + + "search_result_short" "#@1@: @4@@5@ @6@ (@2@ @ @3@)" + "search_result_long" "#@1@: @4@@5@ (@2@ @ @3@)" + + "search_no_match" "ei osumia." +} diff -r b07ff5123baf -r 2294b73df2cf urllog.tcl --- a/urllog.tcl Wed Jan 27 10:57:13 2021 +0200 +++ b/urllog.tcl Wed Jan 27 12:01:20 2021 +0200 @@ -1,6 +1,6 @@ ########################################################################## # -# URLLog v2.6.0 by Matti 'ccr' Hamalainen +# URLLog v2.7.0 by Matti 'ccr' Hamalainen # (C) Copyright 2000-2021 Tecnic Software productions (TNSP) # # This script is freely distributable under GNU GPL (version 2) license. @@ -12,7 +12,7 @@ # # URL-logger script for EggDrop IRC robot, utilizing SQLite3 database # This script requires SQLite TCL extension. Under Debian, you need: -# tcl8.5 libsqlite3-tcl (and eggdrop eggdrop-data, of course) +# tcl8.5 (or later) libsqlite3-tcl eggdrop eggdrop-data # # If you are doing a fresh install, you will need to create the initial # database with the required table schemas. You can do that by running @@ -37,7 +37,7 @@ package require http set urllog_name "URLLog" -set urllog_version "2.6.0" +set urllog_version "2.7.0" set urllog_message "$urllog_name v$urllog_version (C) 2000-2021 ccr/TNSP" @@ -45,13 +45,6 @@ set urllog_shorturl_str "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" -### Binding initializations -bind pub - !urlfind urllog_pub_cmd_urlfind -bind msg - !urlfind urllog_msg_cmd_urlfind -bind pubm - *.* urllog_check_line -bind topc - *.* urllog_check_line - - #------------------------------------------------------------------------- ### Utility functions proc urllog_log {umsg} { @@ -63,6 +56,37 @@ } +proc urllog_qm {uid} { + global urllog_messages + + if {[info exists urllog_messages($uid)]} { + return $urllog_messages($uid) + } else { + return $uid + } +} + + +proc urllog_smsg {apublic anick achan amsg {aargs {}}} { + global urllog_preferredmsg urllog_cmd_name + set amsg [string map [list "@cmd@" $urllog_cmd_name] $amsg] + utl_msg_args $urllog_preferredmsg $apublic $anick $achan $amsg $aargs +} + + +proc urllog_msg {apublic anick achan aid {aargs {}}} { + urllog_smsg $apublic $anick $achan [urllog_qm $aid] $aargs +} + + +proc urllog_verb_msg {apublic anick achan aid {aargs {}}} { + global urllog_verbose + if {$urllog_verbose != 0} { + urllog_msg $apublic $anick $achan $aid $aargs + } +} + + proc urllog_isnumber {uarg} { foreach i [split $uarg {}] { if {![string match \[0-9\] $i]} { return 0 } @@ -71,22 +95,11 @@ } -proc urllog_msg {apublic anick achan amsg} { - global urllog_preferredmsg - - if {$apublic == 1} { - putserv "$urllog_preferredmsg $achan :$amsg" +proc urllog_is_enabled {uval} { + if {$uval} { + return "ON." } else { - putserv "$urllog_preferredmsg $anick :$amsg" - } -} - - -proc urllog_verb_msg {anick achan amsg} { - global urllog_verbose - - if {$urllog_verbose != 0} { - urllog_msg 1 $anick $achan $amsg + return "OFF." } } @@ -99,7 +112,12 @@ } -#------------------------------------------------------------------------- +proc urllog_get_ss {uindex} { + global urllog_shorturl_str + return [string index $urllog_shorturl_str $uindex] +} + + proc urllog_get_short {utime} { global urllog_shorturl_prefix urllog_shorturl_str @@ -110,87 +128,93 @@ set u2 [expr $utmp / $ulen] set u3 [expr $utmp % $ulen] - return "\[ $urllog_shorturl_prefix[string index $urllog_shorturl_str $u1][string index $urllog_shorturl_str $u2][string index $urllog_shorturl_str $u3] \]" + return "\[ $urllog_shorturl_prefix[urllog_get_ss $u1][urllog_get_ss $u2][urllog_get_ss $u3] \]" } -#------------------------------------------------------------------------- -proc urllog_chop_url {url} { - global urllog_shorturl_orig +proc urllog_chop_url {ustr} { + global urllog_shorturl_max_orig - if {[string length $url] > $urllog_shorturl_orig} { - return "[string range $url 0 $urllog_shorturl_orig]..." + if {[string length $ustr] > $urllog_shorturl_max_orig} { + return "[string range $ustr 0 $urllog_shorturl_max_orig]..." } else { - return $url + return $ustr + } +} + + +proc urllog_chop_title {ustr} { + global urllog_title_max + + if {[string length $ustr] > $urllog_title_max} { + return "[string range $ustr 0 $urllog_title_max]..." + } else { + return $ustr } } #------------------------------------------------------------------------- -proc urllog_exists {urlStr urlNick urlHost urlChan} { - global urldb urlmsg_alreadyknown urllog_shorturl - global urllog_msg_channels - - set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[utl_escape $urlStr]'" - urldb eval $usql { - urllog_log "URL said by $urlNick ($urlStr) already known" - if {$urllog_shorturl != 0} { - set qstr "[urllog_get_short $uid] " - } else { - set qstr "" - } - append qstr "($uuser/$uchan@[utl_ctime $utime])" - if {[string length $utitle] > 0} { - set qstr "$urlmsg_alreadyknown - '$utitle' $qstr" - } else { - set qstr "$urlmsg_alreadyknown $qstr" - } - - if {[utl_match_delim_list $urllog_msg_channels $uchan]} { - urllog_verb_msg $urlNick $urlChan $qstr - } - return 0 - } - return 1 -} +proc urllog_add_url {urlStr urlNick urlHost urlChan urlTitle} { + global urllog_db urllog_shorturl urllog_pub_channels -#------------------------------------------------------------------------- -proc urllog_addurl {urlStr urlNick urlHost urlChan urlTitle} { - global urldb urllog_shorturl + ### Does the URL already exist? + set usql "SELECT id AS uid, utime AS utime, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE url='[utl_escape $urlStr]'" + urllog_db eval $usql { + urllog_log "URL said by $urlNick ($urlStr) already known" + if {[utl_match_delim_list $urllog_pub_channels $uchan]} { + if {$urllog_shorturl != 0} { + urllog_verb_msg 1 $urlNick $urlChan "url_known_short" [list $uuser $uchan $uhost [utl_ctime $utime] $utitle $uurl [urllog_get_short $uid]] + } else { + urllog_verb_msg 1 $urlNick $urlChan "url_known_long" [list $uuser $uchan $uhost [utl_ctime $utime] $utitle $uurl] + } + } + return 1 + } + + ### Validate title if {$urlTitle == ""} { set uins "NULL" } else { set uins "'[utl_escape $urlTitle]'" } + + ### Attempt to insert into database set usql "INSERT INTO urls (utime,url,user,host,chan,title) VALUES ([unixtime], '[utl_escape $urlStr]', '[utl_escape $urlNick]', '[utl_escape $urlHost]', '[utl_escape $urlChan]', $uins)" - if {[catch {urldb eval $usql} uerrmsg]} { + if {[catch {urllog_db eval $usql} uerrmsg]} { urllog_log "$uerrmsg on SQL:\n$usql" return 0 } - set uid [urldb last_insert_rowid] + set uid [urllog_db last_insert_rowid] urllog_log "Added URL ($urlNick@$urlChan): $urlStr" ### Let's say something, to confirm that everything went well. if {$urllog_shorturl != 0} { - set qstr "[urllog_get_short $uid] " + set urlShort [urllog_get_short $uid] + set ushort "short" } else { - set qstr "" - } - if {[string length $urlTitle] > 0} { - urllog_verb_msg $urlNick $urlChan "'$urlTitle' ([urllog_chop_url $urlStr]) $qstr" - } else { - urllog_verb_msg $urlNick $urlChan "[urllog_chop_url $urlStr] $qstr" + set urlShort "" + set ushort "long" } + if {[string length $urlTitle] > 0} { + set umode "url_added_${ushort}_has_title" + } else { + set umode "url_added_${ushort}_no_title" + } + + urllog_verb_msg 1 $urlNick $urlChan $umode [list $urlTitle [urllog_chop_title $urlTitle] $urlStr [urllog_chop_url $urlStr] $urlShort] return 1 } #------------------------------------------------------------------------- proc urllog_clear_request { urlStatus urlSCode urlCode urlData urlMeta } { + + ### Clear the request data upvar $urlStatus ustatus upvar $urlSCode uscode upvar $urlCode ucode @@ -207,7 +231,6 @@ #------------------------------------------------------------------------- proc urllog_do_request { urlNick urlChan urlStr urlStatus urlSCode urlCode urlData urlMeta } { - global urlmsg_ioerror urlmsg_timeout urlmsg_errorgettingdoc upvar $urlStatus ustatus upvar $urlSCode uscode @@ -217,35 +240,32 @@ set urlHeaders {} lappend urlHeaders "Accept-Encoding" "identity" -# lappend urlHeaders "Connection" "keep-alive" + #lappend urlHeaders "Connection" "keep-alive" + ### Perform request if {[catch {set utoken [::http::geturl $urlStr -timeout 6000 -binary 1 -headers $urlHeaders]} uerrmsg]} { - urllog_verb_msg $urlNick $urlChan "$urlmsg_ioerror ($uerrmsg)" + urllog_verb_msg 1 $urlNick $urlChan "err_http_get" [list $urlStr $uerrmsg] urllog_log "HTTP request failed: $uerrmsg" return 0 } + ### Check status set ustatus [::http::status $utoken] - if {$ustatus == "timeout"} { - urllog_verb_msg $urlNick $urlChan "$urlmsg_timeout" - urllog_log "HTTP request timed out ($urlStr)" + set uscode [::http::code $utoken] + set ucode [::http::ncode $utoken] + + if {$ustatus != "ok"} { + urllog_verb_msg 1 $urlNick $urlChan "err_http_status" [list $urlStr $ustatus $uscode $ucode] + urllog_log "Error in HTTP request: $ustatus / $uscode ($urlStr)" return 0 } - if {$ustatus != "ok"} { - urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ([::http::error $utoken])" - urllog_log "Error in HTTP transaction: [::http::error $utoken] ($urlStr)" - return 0 - } - - set ustatus [::http::status $utoken] - set uscode [::http::code $utoken] - set ucode [::http::ncode $utoken] + ### Get data set udata [::http::data $utoken] array set umeta [::http::meta $utoken] ::http::cleanup $utoken - # Sanitize the metadata KEYS + ### Sanitize the metadata KEYS foreach {ukey uvalue} [array get umeta] { set ukey [string tolower $ukey] set umeta($ukey) $uvalue @@ -254,10 +274,11 @@ return 1 } + #------------------------------------------------------------------------- proc urllog_validate_url { urlNick urlChan urlMStr urlMProto urlMHostName } { - global urlmsg_nosuchhost urllog_httprep - global urllog_shorturl_prefix urllog_shorturl + global urllog_httprep urllog_shorturl_prefix urllog_shorturl + upvar $urlMStr urlStr upvar $urlMProto urlProto upvar $urlMHostName urlHostName @@ -267,6 +288,10 @@ set urlStr $urlClean } + if {[regexp {^\[(.+)\]$} $urlStr urlMatch urlClean]} { + set urlStr $urlClean + } + ### Clean excess stuff, if any, and attempt to ### guess the URL protocol component if it is missing if {[regexp "(\[a-z\]+)://\[^ \]+" $urlStr urlMatch urlProto]} { @@ -281,22 +306,24 @@ if {[regexp "(\[a-z\]+)://(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})\\.(\[0-9\]{1,3})" $urlStr urlMatch urlProto ni1 ni2 ni3 ni4]} { # Check if the IP is on local network if {$ni1 == 127 || $ni1 == 10 || ($ni1 == 192 && $ni2 == 168)} { - urllog_log "URL pointing to local network, ignored ($urlStr)." + urllog_verb_msg 1 $urlNick $urlChan "err_url_local_net" [list $urlStr] + urllog_log "URL pointing to local network, ignored (${urlStr})." return 0 } if {$ni1 == 0 || $ni1 >= 255 || $ni2 >= 255 || $ni3 >= 255 || $ni4 >= 255} { - urllog_log "URL pointing to invalid network, ignored ($urlStr)." + urllog_verb_msg 1 $urlNick $urlChan "err_url_invalid_net" [list $urlStr] + urllog_log "URL pointing to invalid network, ignored (${urlStr})." return 0 } } ### Check now if we have an ShortURL here ... - if {[string match "$urllog_shorturl_prefix*" $urlStr]} { - urllog_log "Ignoring ShortURL from $urlNick: $urlStr" + if {[string match "${urllog_shorturl_prefix}*" $urlStr]} { + urllog_log "Ignoring ShortURL: ${urlStr}" # set uud "" # set usql "SELECT id AS uid, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE utime=$uud" -# urldb eval $usql { -# urllog_verb_msg $urlNick $urlChan "'$utitle' - $uurl" +# urllog_db eval $usql { +# urllog_msg 1 $urlNick $urlChan "'$utitle' - $uurl" # return 1 # } return 0 @@ -307,11 +334,13 @@ if {[regexp "(\[a-z\]+)://" $urlStr urlMatch urlProto]} { ### Is it a http or ftp url? if {$urlProto != "http" && $urlProto != "https" && $urlProto != "ftp"} { - urllog_log "Broken URL from $urlNick: ($urlStr) UNSUPPORTED protocol class ($urlProto)." + urllog_verb_msg 1 $urlNick $urlChan "err_url_proto_class" [list $urlStr $urlProto] + urllog_log "Broken URL: ${urlStr} - unsupported protocol class (${urlProto})." return 0 } } else { - urllog_log "Broken URL from $urlNick: ($urlStr), no protocol specifier." + urllog_verb_msg 1 $urlNick $urlChan "err_url_proto_no_class" [list $urlStr] + urllog_log "Broken URL: ${urlStr} - no protocol specifier." return 0 } @@ -320,8 +349,9 @@ set urlHostName [lindex $urlRecord 2] set urlPort [lindex [split $urlHostName ":"] end] - if {![urllog_isnumber $urlPort] && $urlPort != "" && $urlPort != $urlHostName} { - urllog_log "Broken URL from $urlNick: ($urlStr) illegal port $urlPort" + if {$urlPort != "" && ![urllog_isnumber $urlPort] && $urlPort != $urlHostName} { + urllog_verb_msg 1 $urlNick $urlChan "err_url_invalid_port" [list $urlStr $urlPort] + urllog_log "Broken URL: ${urlStr} - illegal or invalid port '${urlPort}'" return 0 } @@ -343,13 +373,16 @@ upvar $urlData udata upvar $urlMeta umeta + ### Was result a redirect? if {$ucode >= 301 && $ucode <= 303} { - if {[llength [array get umeta "location"]] == 0} { - urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc (invalid redirect without Location header)" - urllog_log "Error fetching document: status=$ustatus, code=$ucode, scode=$uscode, url=$ustr : Invalid redirect without Location header (redirLevel=${urlRedirLevel}" + ### Check that we have a location header + if {![info exists umeta(location)]} { + urllog_verb_msg 1 $urlNick $urlChan "err_redirect_invalid" [list $ustr $ustatus $uscode $ucode $urlRedirLevel] + urllog_log "Invalid redirect without location header: status=$ustatus, code=$ucode, scode=$uscode, url=$ustr, redirLevel=$urlRedirLevel" return 0 } + ### Fix up location URI set nustr $umeta(location) if {![regexp "\[a-z\]+://" $nustr]} { if {[string range $nustr 0 0] != "/"} { @@ -358,6 +391,7 @@ set nustr "${uproto}://${uhostname}${nustr}" } + ### Validate the target URI urllog_log "Redirection #${urlRedirLevel}: $ustr -> $nustr" set ustr $nustr @@ -365,9 +399,11 @@ return 0 } + ### Attempt to fetch redirection target urllog_clear_request ustatus uscode ucode udata umeta if {![urllog_do_request $urlNick $urlChan $ustr ustatus uscode ucode udata umeta]} { - urllog_verb_msg $urlNick $urlChan "$urlmsg_errorgettingdoc ($uscode)" + urllog_verb_msg 1 $urlNick $urlChan "err_redirect_fail" [list $ustr $ustatus $uscode $ucode $urlRedirLevel] + urllog_log "Error fetching redirect: status=$ustatus, code=$ucode, scode=$uscode, url=$ustr, redirLevel=$urlRedirLevel" return 0 } } @@ -378,14 +414,9 @@ #------------------------------------------------------------------------- proc urllog_check_url {urlStr urlNick urlHost urlChan} { - global urllog_encoding http_tls_support urlmsg_errorgettingdoc + global urllog_encoding http_tls_support global urllog_extra_checks urllog_extra_strict - ### Does the URL already exist? - if {![urllog_exists $urlStr $urlNick $urlHost $urlChan]} { - return 1 - } - ### Validate URL compoments, etc. set urlProto "" set urlHostName "" @@ -398,7 +429,7 @@ # No optional checks, or it's not http/https. if {$urllog_extra_strict == 0} { # Strict checking disabled, so add the URL, if it does not exist already. - urllog_addurl $urlStr $urlNick $urlHost $urlChan "" + urllog_add_url $urlStr $urlNick $urlHost $urlChan "" return 1 } elseif {$http_tls_support == 0 && $urlProto == "https"} { # Strict ENABLED: If TLS support is disabled and we have https, do nothing @@ -407,107 +438,106 @@ # Strict ENABLED: It's not http, or https return 1 } - } - ### Does the document pointed by the URL exist? - if {![urllog_do_request $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { - return 1 - } - - ### Handle redirects of 2 levels - if {![urllog_handle_redirect $urlNick $urlHost $urlChan 1 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { - return 1 - } - - if {![urllog_handle_redirect $urlNick $urlHost $urlChan 2 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { - return 1 - } - - # Final document - if {$ucode >= 200 && $ucode <= 205} { - set uenc_doc "" - set uenc_http "" - set uencoding "" - - # Get information about specified character encodings - if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(content-type) umatches uenc_http]} { - # Found character set encoding information in HTTP headers + } else { + ### Does the document pointed by the URL exist? + if {![urllog_do_request $urlNick $urlChan $urlStr ustatus uscode ucode udata umeta]} { + return 1 } - if {[regexp -nocase -- "" $udata umatches uenc_doc]} { - # Found old style HTML meta tag with character set information - } elseif {[regexp -nocase -- "" $udata umatches uenc_doc]} { - # Found HTML5 style meta tag with character set information + ### Handle redirects of 3 levels + if {![urllog_handle_redirect $urlNick $urlHost $urlChan 1 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { + return 1 + } + + if {![urllog_handle_redirect $urlNick $urlHost $urlChan 2 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { + return 1 + } + + if {![urllog_handle_redirect $urlNick $urlHost $urlChan 3 urlProto urlHostName urlStr ustatus uscode ucode udata umeta]} { + return 1 } - # Make sanitized versions of the encoding strings - set uenc_http2 [urllog_sanitize_encoding $uenc_http] - set uenc_doc2 [urllog_sanitize_encoding $uenc_doc] + # Final document + if {$ucode >= 200 && $ucode <= 205} { + set uenc_doc "" + set uenc_http "" + set uencoding "" - # Check if the document has specified encoding - # KLUDGE! - set uencoding $uenc_http2 - if {$uencoding == "" && $uenc_doc2 != ""} { - set uencoding $uenc_doc2 - } elseif {$uencoding == ""} { - # If _NO_ known encoding of any kind, assume the default of iso8859-1 - set uencoding "iso8859-1" - } - - urllog_log "Charsets: http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2' -> '$uencoding'" - - # Get the document title, if any - set urlTitle "" + # Get information about specified character encodings + if {[info exists umeta(content-type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(content-type) umatches uenc_http]} { + # Found character set encoding information in HTTP headers + } - if {[regexp -nocase -- "" $udata umatches urlTitle]} { - # ... - } elseif {[regexp -nocase -- "(.\*\?)" $udata umatches urlTitle]} { - # ... - } + if {[regexp -nocase -- "" $udata umatches uenc_doc]} { + # Found old style HTML meta tag with character set information + } elseif {[regexp -nocase -- "" $udata umatches uenc_doc]} { + # Found HTML5 style meta tag with character set information + } - # If facebook, get meta info - if {[regexp -nocase -- "(http|https):\/\/www.facebook.com" $urlStr]} { - if {[regexp -nocase -- " '$uencoding'" + + # Get the document title, if any + set urlTitle "" + + if {[regexp -nocase -- "" $udata umatches urlTitle]} { + # ... + } elseif {[regexp -nocase -- "(.\*\?)" $udata umatches urlTitle]} { + # ... + } + + # If facebook, get meta info + if {[regexp -nocase -- "(http|https):\/\/www.facebook.com" $urlStr]} { + if {[regexp -nocase -- " 0} { + set stitle [utl_str_map_values [urllog_qm "search_result_title"] [list $utitle [urllog_chop_title $utitle]]] + } else { + set stitle [urllog_qm "search_result_no_title"] + } + if {$urllog_shorturl != 0 && $uid != ""} { - set shortURL "$shortURL [urllog_get_short $uid]" + urllog_msg $upublic $unick $uchan "search_result_short" [list $nresult $uuser [utl_ctime $utime] $stitle [urllog_chop_url $uurl] [urllog_get_short $uid]] + } else { + urllog_msg $upublic $unick $uchan "search_result_long" [list $nresult $uuser [utl_ctime $utime] $stitle $uurl] } - urllog_msg $upublic $unick $uchan "#$iresults: $shortURL ($uuser@[utl_ctime $utime])" } - if {$iresults == 0} { + if {$nresult == 0} { # If no URLs were found - urllog_msg $upublic $unick $uchan $urlmsg_nomatch + urllog_msg $upublic $unick $uchan "search_no_match" [list $utext] } - return 0 + return 1 } #------------------------------------------------------------------------- ### Finding binded functions -proc urllog_pub_cmd_urlfind {unick uhost uhand uchan utext} { +proc urllog_cmd_pub_find {unick uhost uhand uchan utext} { global urllog_search_channels if {[utl_match_delim_list $urllog_search_channels $uchan]} { - return [urllog_find $unick $uhand $uchan $utext 1] + return [urllog_cmd_find $unick $uhand $uchan $utext 1] } return 0 } -proc urllog_msg_cmd_urlfind {unick uhost uhand utext} { - urllog_find $unick $uhand "" $utext 0 - return 0 +proc urllog_cmd_msg_find {unick uhost uhand utext} { + return [urllog_cmd_find $unick $uhand "" $utext 0] } @@ -613,13 +648,10 @@ ### Miscellaneous init messages -if {$urllog_extra_checks != 0} { - putlog " - Additional URL validity checks enabled." -} - -if {$urllog_verbose != 0} { - putlog " - Verbose mode enabled." -} +putlog " - Log messages [urllog_is_enabled $urllog_log_enable]" +putlog " - Verbose mode [urllog_is_enabled $urllog_verbose]" +putlog " - Additional URL validity checks [urllog_is_enabled $urllog_extra_checks]" +putlog " - Strict checks [urllog_is_enabled $urllog_extra_strict]" ### HTTP module initialization @@ -642,7 +674,7 @@ ### SQLite database initialization -if {[catch {sqlite3 urldb $urllog_db_file} uerrmsg]} { +if {[catch {sqlite3 urllog_db $urllog_db_file} uerrmsg]} { putlog "Could not open SQLite3 database '${urllog_db_file}': ${uerrmsg}" exit 2 }