Mercurial > hg > lukkari
view update.sh @ 46:9b5fa0f3812b
Improvements in the update logic.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sat, 13 Oct 2012 11:58:26 +0300 |
parents | 76900cdfc066 |
children | ceca06576918 |
line wrap: on
line source
#!/bin/sh URLSUFFIX=".htm" function parse() { # Create cache directories, if they do not exist if test ! -d "${OLDCACHEDIR}"; then mkdir -p "${OLDCACHEDIR}" fi if test ! -d "${CACHEDIR}"; then mkdir -p "${CACHEDIR}" fi URLPREFIX="$1" CLASSFILE="$2" LISTFILE="$2.tmp" PATPREFIX="$3" if wget -q -O "$LISTFILE" "$URLPREFIX"; then perl -ne "if (/<a href=\"${PATPREFIX}([A-Z]{3}\d\S+)${URLSUFFIX}\">/) { print \"\$1\n\"; }" < "$LISTFILE" > "$CLASSFILE" echo -n "* Fetched classfile $CLASSFILE: " cat "$CLASSFILE" | wc -l fi if test -e "$CLASSFILE"; then cat "$CLASSFILE" | while read i; do parse=no INFILE="${CACHEDIR}${i}.html" ONFILE="${OLDCACHEDIR}${i}.html" DATAFILE="${i}.data" wget -q -O "${INFILE}.new" "${URLPREFIX}${PATPREFIX}${i}${URLSUFFIX}" if test -e "${INFILE}.new"; then # New data fetched, does old file exist? if test -e "$INFILE"; then # Yes, do a diff if ! diff -u "$INFILE" "$INFILE.new" > "$INFILE.diff"; then # There were differences, do a parse parse=yes mv "$INFILE" "$ONFILE" && \ mv "$INFILE.new" "$INFILE" else # No changes, apparently .. remove the new one rm -f "$INFILE.new" "$INFILE.diff" fi else # No old file, parse new data mv "$INFILE.new" "$INFILE" parse=yes fi fi # No new file fetched, does datafile exist? if test ! -e "${CACHEDIR}${DATAFILE}"; then # No, try to parse it if old file input exists parse=yes fi # Parsing of old data requested? if test "x$parse" = "xyes" -a -e "$INFILE"; then if test -e "${CACHEDIR}${DATAFILE}"; then mv "${CACHEDIR}${DATAFILE}" "${OLDCACHEDIR}${DATAFILE}" fi echo "Parsing $i" perl parsedata.pl -php "$INFILE" -o "${CACHEDIR}${DATAFILE}" fi done fi } CACHEDIR="cache/" OLDCACHEDIR="cache-old/" parse "http://www.oamk.fi/tyojarjestykset/otek/luokat/" "luokat.txt" "OR_" #parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "luokat.txt" "Ryh._" CACHEDIR="cache-next/" OLDCACHEDIR="cache-next-old/" parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "luokat_next.txt" "Ryh._"