view update.sh @ 34:1f6be8457912

Script output cosmetics.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 09 Nov 2011 22:22:42 +0200
parents dbe7ff545293
children 4d9354abda73
line wrap: on
line source

#!/bin/sh
URLSUFFIX=".htm"
CACHEDIR="cache/"

function parse()
{
URLPREFIX="$1"
CLASSFILE="$2"
LISTFILE="$2.tmp"
PATPREFIX="$3"

if wget -q -O "$LISTFILE" "$URLPREFIX"; then
	perl -ne "if (/<a href=\"${PATPREFIX}([A-Z]{3}\d\S+)${URLSUFFIX}\">/) { print \"\$1\n\"; }" < "$LISTFILE" > "$CLASSFILE"
	echo -n "* Fetched classfile $CLASSFILE: "
	cat "$CLASSFILE" | wc -l
fi

if test -e "$CLASSFILE"; then
cat "$CLASSFILE" | while read i; do
	parse=no
	INFILE="${CACHEDIR}${i}.html"
	wget -q -O "$INFILE.new" "${URLPREFIX}${PATPREFIX}${i}${URLSUFFIX}"

	if test -e "$INFILE.new"; then
		# New data fetched, does old file exist?
		if test -e "$INFILE"; then
			# Yes, do a diff
			if ! diff -u "$INFILE" "$INFILE.new" > "$INFILE.diff"; then
				# There were differences, do a parse
				parse=yes
				mv "$INFILE" "$INFILE.old"
				mv "$INFILE.new" "$INFILE"
			fi
		else
			# No old file, parse new data
			mv "$INFILE.new" "$INFILE"
			parse=yes
		fi
	else
		# No new file fetched, does datafile exist?
		if test ! -e "$i.data"; then
			# No, try to parse it if old file input exists
			parse=yes
		fi
	fi

	# Parsing of old data requested?
	if test "x$parse" = "xyes" -a -e "$INFILE"; then
		OUTFILE="${CACHEDIR}/$i.data"
		if test -e "$OUTFILE"; then
			mv "$OUTFILE" "$OUTFILE.old"
		fi
		echo "Parsing $i"
		perl parsedata.pl -php "$INFILE" -o "$OUTFILE"
	fi
done
fi
}

#parse "http://www.oamk.fi/tyojarjestykset/otek/luokat/" "luokat.txt" ""
parse "http://www.oamk.fi/~heikkim/Luhti1/Ryhm%84t/" "luokat.txt" "Ryh._"

CACHEDIR="cache-next/"
parse "http://www.oamk.fi/~heikkim/Luhti2/Ryhm%84t/" "luokat.txt" "Ryh._"