view fetch_food.tcl @ 424:825cac46b1cb

Cosmetic / stray trailing whitespace cleanup.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 08 Jan 2017 03:55:55 +0200
parents 599b90e41c03
children
line wrap: on
line source

#!/usr/bin/tclsh
#
# NOTICE! Change above path to correct tclsh binary path!
#
##########################################################################
#
# RuokaLista fetcher v1.0 by Matti 'ccr' Hamalainen <ccr@tnsp.org>
# (C) Copyright 2010-2011 Tecnic Software productions (TNSP)
#
# This script is freely distributable under GNU GPL (version 2) license.
#
##########################################################################

# Path and filename of the data file, MUST BE SAME as in "ruoka.tcl"
set datafile "/home/niinuska/bot/data.ruoka"

# Use a HTTP proxy? (1 = yes, 0 = no)
set http_proxy 0

# HTTP proxy host address and port (only needed if use_proxy = 1)
set http_proxy_host "cache.inet.fi"
set http_proxy_port 800


##########################################################################
set html_ent [split "\n| |\r| |<br />| |&nbsp;| |&#160;| |&raquo;|>>|&quot;|\"|&auml;|ä|&ouml;|ö|&Auml;|Ä|&Ouml;|Ö|&amp;|&|&lt;|<|&gt;|>|ä|ä|ö|ö|Ä|Ä" "|"]

package require http
::http::config -urlencoding iso8859-1 -useragent "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.0) Opera 9.5"
if {$http_proxy != 0} {
  ::http::config -proxyhost $http_proxy_host -proxyport $http_proxy_port
}


proc convert_ent {udata} {
  global html_ent
  return [string map $html_ent $udata]
}


proc add_entry {uname uday udate udesc} {
  global entries
  set utest "$uname:$udate"
  if {[catch {set utmp $entries($utest)}]} {
    set entries($utest) [list $uname $uday $udate $udesc]
  }
}


proc add_amica {datauri dataname} {
  if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
    puts "Error getting $datauri: $uerrmsg"
    return 1
  }

  set upage [::http::data $utoken]
  ::http::cleanup $utoken

  set nmatches 1
  while {$nmatches > 0} {
    set umatches [regexp -nocase -inline -- "<strong>(Maanantai|Tiistai|Keskiviikko|Torstai|Perjantai|Lauantai|Sunnuntai)</strong></td>.?.?<td colspan=\"2\"><strong>(\[^<\]+)</strong></td>(.*)\$" $upage]
    set nmatches [llength $umatches]
    if {$nmatches > 3} {
      set umat [regexp -nocase -inline -- "^(.+?)(<td colspan=\"3\">|</tbody>)" [lindex $umatches 3]]
      set umat [regexp -all -nocase -inline -- "<td colspan=\"\[78\]\">(.\*\?)</td>" [lindex $umat 1]]
      set tmp ""
      foreach {ukey udata} $umat {
        set item [string trim [convert_ent $udata]]
        if {[string length $item] > 0} {
          lappend tmp $item
        }
      }
      add_entry $dataname [lindex $umatches 1] [lindex $umatches 2] [join $tmp "; "]
    }
    set upage [lindex $umatches 3]
  }

  return 0
}

proc add_uniresta {datauri dataname} {
  if {[catch {set utoken [::http::geturl $datauri -binary true -timeout 5000]} uerrmsg]} {
    puts "Error getting $datauri: $uerrmsg"
    return 1
  }

  set upage [::http::data $utoken]
  ::http::cleanup $utoken

  set nmatches 1
  while {$nmatches > 0} {
#    set umatches [regexp -nocase -inline -- "<span class='otsikko'><b>(Maanantai|Tiistai|Keskiviikko|Torstai|Perjantai|Lauantai|Sunnuntai) \[0-9\]+\.\[0-9\]+</b></span><P>(\[^<\]+)</span>(.*)\$" $upage]
    set umatches [regexp -nocase -inline -- "<span class='otsikko'><b>(Maanantai|Tiistai|Keskiviikko|Torstai|Perjantai|Lauantai|Sunnuntai) +\[0-9\]+\.\[0-9\]+</b></span>(.*?)</span>(.*)$" $upage]
    set nmatches [llength $umatches]
    puts "[lindex $umatches 1]"
    if {$nmatches > 3} {
#      set umat [regexp -nocase -inline -- "^(.+?)<br>" [lindex $umatches 3]]
      set umat [regexp -all -nocase -inline -- "(.\*\?)<br>" [lindex $umatches 2]]
      set tmp ""
      foreach {ukey udata} $umat {
        set item [string trim [convert_ent $udata]]
        if {[string length $item] > 0} {
          lappend tmp $item
        }
      }
      add_entry $dataname [lindex $umatches 1] [lindex $umatches 2] [join $tmp "; "]
    }
    set upage [lindex $umatches 3]
  }

  return 0
}


##########################################################################
set kello [clock seconds]
set viikko [expr [clock format $kello -format "%W"] + 1]
set vuosi [clock format $kello -format "%Y"]

# Amica/OAMK tekniikan yksikkö
add_amica "http://www.amica.fi/kotkanpoika" "OAMK"

# Oulun yliopiston Unirestat
set str "http://www.uniresta.fi/2010/ruokalista_tulostettava.php?viikko=$viikko&vuosi=$vuosi&ravintola"

add_uniresta "$str=2"  "Aularavintola"
add_uniresta "$str=3"  "Discus"
add_uniresta "$str=4"  "Julinia"
add_uniresta "$str=5"  "Kastari"
add_uniresta "$str=6"  "Snellmania"
add_uniresta "$str=7"  "Pruxis"
add_uniresta "$str=10" "Vanilla"
add_uniresta "$str=11" "Minttu"


##########################################################################
### Open result datafile and save data
set tmpfname "$datafile.tmp"
if {[catch {set outfile [open $tmpfname w 0600]} uerrmsg]} {
  puts "Error opening $tmpfname for writing: $uerrmsg"
  return 1
}

foreach {ukey udata} [array get entries] {
  puts $outfile [join $udata "½"]
}

close $outfile
if {[catch {file rename -force -- $tmpfname $datafile} uerrmsg]} {
  puts "Error renaming $tmpfname to $datafile: $uerrmsg"
}