view update.sh @ 21:d0c8dda49726

Update script diff checking was not working, fixed.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 14 Jan 2011 20:35:21 +0200
parents 7fca87c41e17
children f025d36926a1
line wrap: on
line source

#!/bin/sh
URLPREFIX="http://www.oamk.fi/tyojarjestykset/otek/luokat/"
URLSUFFIX=".htm"
CACHEDIR="cache/"
LISTFILE="luokat.tmp"
CLASSFILE="luokat.txt"


if wget -q -O "$LISTFILE" "$URLPREFIX"; then
	perl -ne 'if (/<a href="OR_([A-Z]{3}\d\S+)\.htm">/) { print "$1\n"; }' < "$LISTFILE" > "$CLASSFILE"
fi


cat "$CLASSFILE" | while read i; do
	parse=no
	INFILE="${CACHEDIR}${i}.html"
	wget -q -O "$INFILE.new" "${URLPREFIX}OR_${i}${URLSUFFIX}"

	if test -e "$INFILE.new"; then
		# New data fetched, does old file exist?
		if test -e "$INFILE"; then
			# Yes, do a diff
			if ! diff "$INFILE" "$INFILE.new" > /dev/null; then
				# There were differences, do a parse
				parse=yes
				mv "$INFILE.new" "$INFILE"
			fi
		else
			# No old file, parse new data
			mv "$INFILE.new" "$INFILE"
			parse=yes
		fi
	else
		# No new file fetched, does datafile exist?
		if test ! -e "$i.data"; then
			# No, try to parse it if old file input exists
			parse=yes
		fi
	fi

	# Parsing of old data requested?
	if test "x$parse" = "xyes" -a -e "$INFILE"; then
		perl parsedata.pl -php "$INFILE" -o "${CACHEDIR}/$i.data"
	fi
done