comparison update.sh @ 46:9b5fa0f3812b

Improvements in the update logic.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 13 Oct 2012 11:58:26 +0300
parents 76900cdfc066
children ceca06576918
comparison
equal deleted inserted replaced
45:d8a1e85b8dda 46:9b5fa0f3812b
1 #!/bin/sh 1 #!/bin/sh
2 URLSUFFIX=".htm" 2 URLSUFFIX=".htm"
3 3
4 function parse() 4 function parse()
5 { 5 {
6 # Create cache directories, if they do not exist
7 if test ! -d "${OLDCACHEDIR}"; then
8 mkdir -p "${OLDCACHEDIR}"
9 fi
10
11 if test ! -d "${CACHEDIR}"; then
12 mkdir -p "${CACHEDIR}"
13 fi
14
6 URLPREFIX="$1" 15 URLPREFIX="$1"
7 CLASSFILE="$2" 16 CLASSFILE="$2"
8 LISTFILE="$2.tmp" 17 LISTFILE="$2.tmp"
9 PATPREFIX="$3" 18 PATPREFIX="$3"
10 19
16 25
17 if test -e "$CLASSFILE"; then 26 if test -e "$CLASSFILE"; then
18 cat "$CLASSFILE" | while read i; do 27 cat "$CLASSFILE" | while read i; do
19 parse=no 28 parse=no
20 INFILE="${CACHEDIR}${i}.html" 29 INFILE="${CACHEDIR}${i}.html"
21 wget -q -O "$INFILE.new" "${URLPREFIX}${PATPREFIX}${i}${URLSUFFIX}" 30 ONFILE="${OLDCACHEDIR}${i}.html"
31 DATAFILE="${i}.data"
32 wget -q -O "${INFILE}.new" "${URLPREFIX}${PATPREFIX}${i}${URLSUFFIX}"
22 33
23 if test -e "$INFILE.new"; then 34 if test -e "${INFILE}.new"; then
24 # New data fetched, does old file exist? 35 # New data fetched, does old file exist?
25 if test -e "$INFILE"; then 36 if test -e "$INFILE"; then
26 # Yes, do a diff 37 # Yes, do a diff
27 if ! diff -u "$INFILE" "$INFILE.new" > "$INFILE.diff"; then 38 if ! diff -u "$INFILE" "$INFILE.new" > "$INFILE.diff"; then
28 # There were differences, do a parse 39 # There were differences, do a parse
29 parse=yes 40 parse=yes
30 mv "$INFILE" "$INFILE.old" 41 mv "$INFILE" "$ONFILE" && \
31 mv "$INFILE.new" "$INFILE" 42 mv "$INFILE.new" "$INFILE"
43 else
44 # No changes, apparently .. remove the new one
45 rm -f "$INFILE.new" "$INFILE.diff"
32 fi 46 fi
33 else 47 else
34 # No old file, parse new data 48 # No old file, parse new data
35 mv "$INFILE.new" "$INFILE" 49 mv "$INFILE.new" "$INFILE"
36 parse=yes 50 parse=yes
37 fi 51 fi
38 else 52 fi
39 # No new file fetched, does datafile exist? 53 # No new file fetched, does datafile exist?
40 if test ! -e "$i.data"; then 54 if test ! -e "${CACHEDIR}${DATAFILE}"; then
41 # No, try to parse it if old file input exists 55 # No, try to parse it if old file input exists
42 parse=yes 56 parse=yes
43 fi
44 fi 57 fi
45 58
46 # Parsing of old data requested? 59 # Parsing of old data requested?
47 if test "x$parse" = "xyes" -a -e "$INFILE"; then 60 if test "x$parse" = "xyes" -a -e "$INFILE"; then
48 OUTFILE="${CACHEDIR}/$i.data" 61 if test -e "${CACHEDIR}${DATAFILE}"; then
49 if test -e "$OUTFILE"; then 62 mv "${CACHEDIR}${DATAFILE}" "${OLDCACHEDIR}${DATAFILE}"
50 mv "$OUTFILE" "$OUTFILE.old"
51 fi 63 fi
52 echo "Parsing $i" 64 echo "Parsing $i"
53 perl parsedata.pl -php "$INFILE" -o "$OUTFILE" 65 perl parsedata.pl -php "$INFILE" -o "${CACHEDIR}${DATAFILE}"
54 fi 66 fi
55 done 67 done
56 fi 68 fi
57 } 69 }
58 70
59 CACHEDIR="cache/" 71 CACHEDIR="cache/"
60 #parse "http://www.oamk.fi/tyojarjestykset/otek/luokat/" "luokat.txt" "OR_" 72 OLDCACHEDIR="cache-old/"
61 parse "http://www.oamk.fi/~heikkim/riihi1/luokat/" "luokat.txt" "Ryh._" 73 parse "http://www.oamk.fi/tyojarjestykset/otek/luokat/" "luokat.txt" "OR_"
74 #parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "luokat.txt" "Ryh._"
62 75
63 CACHEDIR="cache-next/" 76 CACHEDIR="cache-next/"
64 parse "http://www.oamk.fi/~heikkim/riihi2/luokat/" "luokat_next.txt" "Ryh._" 77 OLDCACHEDIR="cache-next-old/"
78 parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "luokat_next.txt" "Ryh._"