Mercurial > hg > egg-tcls
changeset 291:54d34d086b47
urllog: Use the utility lib for entity conversion.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 26 Jan 2015 14:01:07 +0200 |
parents | cad1041b5bc4 |
children | 9f90d6918626 |
files | urllog.tcl |
diffstat | 1 files changed, 4 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/urllog.tcl Mon Jan 26 14:00:21 2015 +0200 +++ b/urllog.tcl Mon Jan 26 14:01:07 2015 +0200 @@ -28,6 +28,9 @@ ### you wish. See "config.urllog.example" for an example config file. source [file dirname [info script]]/config.urllog +### Required utillib.tcl +source [file dirname [info script]]/utillib.tcl + ########################################################################## # No need to look below this line @@ -39,15 +42,6 @@ set urllog_httprep [split "\@|%40|{|%7B|}|%7D|\[|%5B|\]|%5D" "|"] -set urllog_ent_str "-|-|'|'|—|-|‏||—|-|–|--|‪||‬|" -append urllog_ent_str "|‎||å|å|Å|Å|é|é|:|:| | " -append urllog_ent_str "|”|\"|“|\"|«|<<|»|>>|"|\"" -append urllog_ent_str "|ä|ä|ö|ö|Ä|Ä|Ö|Ö|&|&|<|<|>|>" -append urllog_ent_str "|ä|ä|å|ö|—|-|'|'|–|-|"|\"" -append urllog_ent_str "|||-|’|'|ü|ü|Ü|Ü|•|*|€|€" -append urllog_ent_str "|”|\"|‘|'" -set urllog_html_ent [split [encoding convertfrom "utf-8" $urllog_ent_str] "|"] - ### Require packages package require sqlite3 package require http @@ -144,17 +138,6 @@ } -proc urllog_convert_ent {udata} { - global urllog_html_ent - return [string map -nocase $urllog_html_ent [string map $urllog_html_ent $udata]] -} - - -proc urllog_escape { str } { - return [string map {' ''} $str] -} - - proc urllog_sanitize_encoding {uencoding} { regsub -- "^\[a-z\]\[a-z\]_\[A-Z\]\[A-Z\]\." $uencoding "" uencoding set uencoding [string tolower $uencoding] @@ -491,7 +474,7 @@ } # Convert some HTML entities to plaintext and do some cleanup - set utmp [urllog_convert_ent $urlTitle] + set utmp [utl_convert_html_ent $urlTitle] regsub -all "\r|\n|\t" $utmp " " utmp regsub -all " *" $utmp " " utmp set urlTitle [string trim $utmp]