annotate update.sh @ 6:7fca87c41e17

Added data fetching and updating shellscript.
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 11 Jan 2011 21:58:52 +0200
parents
children d0c8dda49726
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 #!/bin/sh
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 URLPREFIX="http://www.oamk.fi/tyojarjestykset/otek/luokat/"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 URLSUFFIX=".htm"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 CACHEDIR="cache/"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 LISTFILE="luokat.tmp"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 CLASSFILE="luokat.txt"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 if wget -q -O "$LISTFILE" "$URLPREFIX"; then
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 perl -ne 'if (/<a href="OR_([A-Z]{3}\d\S+)\.htm">/) { print "$1\n"; }' < "$LISTFILE" > "$CLASSFILE"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11 fi
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
12
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
13
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
14 cat "$CLASSFILE" | while read i; do
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
15 parse=no
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
16 INFILE="${CACHEDIR}${i}.html"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 wget -q -O "$INFILE.new" "${URLPREFIX}OR_${i}${URLSUFFIX}"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
18
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
19 if test -e "$INFILE.new"; then
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20 # New data fetched, does old file exist?
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21 if test -e "$INFILE"; then
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22 # Yes, do a diff
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 if diff "$INFILE" "$INFILE.new"; then
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 # There were differences, do a parse
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
25 parse=yes
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
26 mv "$INFILE.new" "$INFILE"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
27 fi
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
28 else
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
29 # No old file, parse new data
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
30 mv "$INFILE.new" "$INFILE"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31 parse=yes
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32 fi
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 else
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34 # No new file fetched, does datafile exist?
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35 if test ! -e "$i.data"; then
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
36 # No, try to parse it if old file input exists
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37 parse=yes
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
38 fi
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39 fi
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
40
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
41 # Parsing of old data requested?
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
42 if test "x$parse" = "xyes" -a -e "$INFILE"; then
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43 perl parsedata.pl -php "$INFILE" -o "${CACHEDIR}/$i.data"
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
44 fi
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
45 done
7fca87c41e17 Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
46