Mercurial > hg > egg-tcls
comparison urllog.tcl @ 150:52350ed97775
urllog: Cleanups, rename/move some global variables.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 27 May 2014 07:12:59 +0300 |
parents | 76eefceb2b90 |
children | cbee8ca52eb8 |
comparison
equal
deleted
inserted
replaced
149:fc2654064339 | 150:52350ed97775 |
---|---|
279 } | 279 } |
280 return $utitle | 280 return $utitle |
281 } | 281 } |
282 | 282 |
283 #------------------------------------------------------------------------- | 283 #------------------------------------------------------------------------- |
284 set urllog_shorturl_str "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" | |
285 | |
284 proc urllog_get_short {utime} { | 286 proc urllog_get_short {utime} { |
285 global urllog_shorturl_prefix | 287 global urllog_shorturl_prefix urllog_shorturl_str |
286 | 288 |
287 set ustr "ABCDEFGHIJKLNMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" | 289 set ulen [string length $urllog_shorturl_str] |
288 set ulen [string length $ustr] | |
289 | 290 |
290 set u1 [expr $utime / ($ulen * $ulen)] | 291 set u1 [expr $utime / ($ulen * $ulen)] |
291 set utmp [expr $utime % ($ulen * $ulen)] | 292 set utmp [expr $utime % ($ulen * $ulen)] |
292 set u2 [expr $utmp / $ulen] | 293 set u2 [expr $utmp / $ulen] |
293 set u3 [expr $utmp % $ulen] | 294 set u3 [expr $utmp % $ulen] |
294 | 295 |
295 return "\[ $urllog_shorturl_prefix[string index $ustr $u1][string index $ustr $u2][string index $ustr $u3] \]" | 296 return "\[ $urllog_shorturl_prefix[string index $urllog_shorturl_str $u1][string index $urllog_shorturl_str $u2][string index $urllog_shorturl_str $u3] \]" |
297 } | |
296 } | 298 } |
297 | 299 |
298 | 300 |
299 #------------------------------------------------------------------------- | 301 #------------------------------------------------------------------------- |
300 proc urllog_chop_url {url} { | 302 proc urllog_chop_url {url} { |
403 # Skip TLD check for URLs with IP address | 405 # Skip TLD check for URLs with IP address |
404 set u_checktld 0 | 406 set u_checktld 0 |
405 } | 407 } |
406 | 408 |
407 ### Check now if we have an ShortURL here ... | 409 ### Check now if we have an ShortURL here ... |
408 if {$urllog_shorturl != 0 && [string match "*$urllog_shorturl_prefix*" $urlStr]} { | 410 if {[string match "$urllog_shorturl_prefix*" $urlStr]} { |
409 urllog_log "Ignoring ShortURL from $urlNick: $urlStr" | 411 urllog_log "Ignoring ShortURL from $urlNick: $urlStr" |
412 set uud "" | |
413 set usql "SELECT id AS uid, url AS uurl, user AS uuser, host AS uhost, chan AS uchan, title AS utitle FROM urls WHERE utime=$uud" | |
414 urldb eval $usql { | |
415 | |
416 } | |
410 return 0 | 417 return 0 |
411 } | 418 } |
412 | 419 |
413 ### Get URL protocol component | 420 ### Get URL protocol component |
414 set u_proto "" | 421 set u_proto "" |
509 # Get information about specified character encodings | 516 # Get information about specified character encodings |
510 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(Content-Type) umatches uenc_http]} { | 517 if {[info exists umeta(Content-Type)] && [regexp -nocase {charset\s*=\s*([a-z0-9._-]+)} $umeta(Content-Type) umatches uenc_http]} { |
511 # Found character set encoding information in HTTP headers | 518 # Found character set encoding information in HTTP headers |
512 } | 519 } |
513 | 520 |
514 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/>" $udata umatches uenc_doc]} { | 521 if {[regexp -nocase -- "<meta.\*\?content=\"text/html.\*\?charset=(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} { |
515 # Found old style HTML meta tag with character set information | 522 # Found old style HTML meta tag with character set information |
516 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/>" $udata umatches uenc_doc]} { | 523 } elseif {[regexp -nocase -- "<meta.\*\?charset=\"(\[^\"\]*)\".\*\?/\?>" $udata umatches uenc_doc]} { |
517 # Found HTML5 style meta tag with character set information | 524 # Found HTML5 style meta tag with character set information |
518 } | 525 } |
519 | 526 |
520 # Make sanitized versions of the encoding strings | 527 # Make sanitized versions of the encoding strings |
521 set uenc_http2 [urllog_sanitize_encoding $uenc_http] | 528 set uenc_http2 [urllog_sanitize_encoding $uenc_http] |
522 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc] | 529 set uenc_doc2 [urllog_sanitize_encoding $uenc_doc] |
523 | 530 |
524 # KLUDGE! | 531 # KLUDGE! |
525 set uencoding $uenc_http2 | 532 set uencoding $uenc_http2 |
533 | |
534 putlog "got charsets : http='$uenc_http', doc='$uenc_doc' / sanitized http='$uenc_http2', doc='$uenc_doc2'" | |
535 | |
526 | 536 |
527 # Check if the document has specified encoding | 537 # Check if the document has specified encoding |
528 if {$uenc_doc != ""} { | 538 if {$uenc_doc != ""} { |
529 # Does it differ from what HTTP says? | 539 # Does it differ from what HTTP says? |
530 if {$uenc_http != "" && $uenc_doc != $uenc_http && $uenc_doc2 != $uenc_http2} { | 540 if {$uenc_http != "" && $uenc_doc != $uenc_http && $uenc_doc2 != $uenc_http2} { |
544 putlog "conversion requested from $uencoding" | 554 putlog "conversion requested from $uencoding" |
545 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { | 555 if {[catch {set urlTitle [encoding convertfrom $uencoding $urlTitle]} cerrmsg]} { |
546 urllog_log "Error in charset conversion: $cerrmsg" | 556 urllog_log "Error in charset conversion: $cerrmsg" |
547 } | 557 } |
548 } | 558 } |
559 | |
560 # putlog "xxx: $uencoding : '$urlTitle'" | |
561 # return 0 | |
549 | 562 |
550 # Convert some HTML entities to plaintext and do some cleanup | 563 # Convert some HTML entities to plaintext and do some cleanup |
551 set utmp [urllog_convert_ent $urlTitle] | 564 set utmp [urllog_convert_ent $urlTitle] |
552 regsub -all "\r|\n|\t" $utmp " " utmp | 565 regsub -all "\r|\n|\t" $utmp " " utmp |
553 regsub -all " *" $utmp " " utmp | 566 regsub -all " *" $utmp " " utmp |