comparison urllog.tcl @ 150:52350ed97775

urllog: Cleanups, rename/move some global variables.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 27 May 2014 07:12:59 +0300
parents 76eefceb2b90
children cbee8ca52eb8
comparison
equal deleted inserted replaced
149:fc2654064339 150:52350ed97775
279 } 279 }
280 return $utitle 280 return $utitle
281 } 281 }
282 282
283 #------------------------------------------------------------------------- 283 #-------------------------------------------------------------------------
284 set urllog_shorturl_str "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
285
284 proc urllog_get_short {utime} { 286 proc urllog_get_short {utime} {
285 global urllog_shorturl_prefix 287 global urllog_shorturl_prefix urllog_shorturl_str
286 288
287 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 289 set ulen [string length $urllog_shorturl_str]
288 set ulen [string length $ustr]
289 290
290 set u1 [expr $utime / ($ulen * $ulen)] 291 set u1 [expr $utime / ($ulen * $ulen)]
291 set utmp [expr $utime % ($ulen * $ulen)] 292 set utmp [expr $utime % ($ulen * $ulen)]
292 set u2 [expr $utmp / $ulen] 293 set u2 [expr $utmp / $ulen]
293 set u3 [expr $utmp % $ulen] 294 set u3 [expr $utmp % $ulen]
294 295
295 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" 296 return "\[ $urllog_shorturl_prefix[string index $urllog_shorturl_str $u1][string index $urllog_shorturl_str $u2][string index $urllog_shorturl_str $u3] \]"
297 }
296 } 298 }
297 299
298 300
299 #------------------------------------------------------------------------- 301 #-------------------------------------------------------------------------
300 proc urllog_chop_url {url} { 302 proc urllog_chop_url {url} {
403 # Skip TLD check for URLs with IP address 405 # Skip TLD check for URLs with IP address
404 set u_checktld 0 406 set u_checktld 0
405 } 407 }
406 408
407 ### Check now if we have an ShortURL here ... 409 ### Check now if we have an ShortURL here ...
408 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { 410 if {[string match "$urllog_shorturl_prefix*" $urlStr]} {
409 urllog_log "Ignoring ShortURL from $urlNick: $urlStr" 411 urllog_log "Ignoring ShortURL from $urlNick: $urlStr"
412 set uud ""
413 set usql "SELECT id AS uid, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE utime=$uud"
414 urldb eval $usql {
415
416 }
410 return 0 417 return 0
411 } 418 }
412 419
413 ### Get URL protocol component 420 ### Get URL protocol component
414 set u_proto "" 421 set u_proto ""
509 # Get information about specified character encodings 516 # Get information about specified character encodings
510 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(Content-Type) umatches uenc_http]} { 517 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(Content-Type) umatches uenc_http]} {
511 # Found character set encoding information in HTTP headers 518 # Found character set encoding information in HTTP headers
512 } 519 }
513 520
514 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/>" $udata umatches uenc_doc]} { 521 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} {
515 # Found old style HTML meta tag with character set information 522 # Found old style HTML meta tag with character set information
516 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/>" $udata umatches uenc_doc]} { 523 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} {
517 # Found HTML5 style meta tag with character set information 524 # Found HTML5 style meta tag with character set information
518 } 525 }
519 526
520 # Make sanitized versions of the encoding strings 527 # Make sanitized versions of the encoding strings
521 set uenc_http2 [urllog_sanitize_encoding $uenc_http] 528 set uenc_http2 [urllog_sanitize_encoding $uenc_http]
522 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc] 529 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc]
523 530
524 # KLUDGE! 531 # KLUDGE!
525 set uencoding $uenc_http2 532 set uencoding $uenc_http2
533
534 putlog "got charsets : http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2'"
535
526 536
527 # Check if the document has specified encoding 537 # Check if the document has specified encoding
528 if {$uenc_doc != ""} { 538 if {$uenc_doc != ""} {
529 # Does it differ from what HTTP says? 539 # Does it differ from what HTTP says?
530 if {$uenc_http != "" && $uenc_doc != $uenc_http && $uenc_doc2 != $uenc_http2} { 540 if {$uenc_http != "" && $uenc_doc != $uenc_http && $uenc_doc2 != $uenc_http2} {
544 putlog "conversion requested from $uencoding" 554 putlog "conversion requested from $uencoding"
545 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { 555 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} {
546 urllog_log "Error in charset conversion: $cerrmsg" 556 urllog_log "Error in charset conversion: $cerrmsg"
547 } 557 }
548 } 558 }
559
560 # putlog "xxx: $uencoding : '$urlTitle'"
561 # return 0
549 562
550 # Convert some HTML entities to plaintext and do some cleanup 563 # Convert some HTML entities to plaintext and do some cleanup
551 set utmp [urllog_convert_ent $urlTitle] 564 set utmp [urllog_convert_ent $urlTitle]
552 regsub -all "\r|\n|\t" $utmp " " utmp 565 regsub -all "\r|\n|\t" $utmp " " utmp
553 regsub -all " *" $utmp " " utmp 566 regsub -all " *" $utmp " " utmp