Mercurial > hg > lukkari
annotate update.sh @ 46:9b5fa0f3812b
Improvements in the update logic.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sat, 13 Oct 2012 11:58:26 +0300 |
parents | 76900cdfc066 |
children | ceca06576918 |
rev | line source |
---|---|
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
1 #!/bin/sh |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
2 URLSUFFIX=".htm" |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
3 |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
4 function parse() |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
5 { |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
6 # Create cache directories, if they do not exist |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
7 if test ! -d "${OLDCACHEDIR}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
8 mkdir -p "${OLDCACHEDIR}" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
9 fi |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
10 |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
11 if test ! -d "${CACHEDIR}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
12 mkdir -p "${CACHEDIR}" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
13 fi |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
14 |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
15 URLPREFIX="$1" |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
16 CLASSFILE="$2" |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
17 LISTFILE="$2.tmp" |
29
ac51fc10414f
Add support for URL prefix pattern in updates.
Matti Hamalainen <ccr@tnsp.org>
parents:
24
diff
changeset
|
18 PATPREFIX="$3" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
19 |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
20 if wget -q -O "$LISTFILE" "$URLPREFIX"; then |
29
ac51fc10414f
Add support for URL prefix pattern in updates.
Matti Hamalainen <ccr@tnsp.org>
parents:
24
diff
changeset
|
21 perl -ne "if (/<a href=\"${PATPREFIX}([A-Z]{3}\d\S+)${URLSUFFIX}\">/) { print \"\$1\n\"; }" < "$LISTFILE" > "$CLASSFILE" |
34 | 22 echo -n "* Fetched classfile $CLASSFILE: " |
23 cat "$CLASSFILE" | wc -l | |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
24 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
25 |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
26 if test -e "$CLASSFILE"; then |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
27 cat "$CLASSFILE" | while read i; do |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
28 parse=no |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
29 INFILE="${CACHEDIR}${i}.html" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
30 ONFILE="${OLDCACHEDIR}${i}.html" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
31 DATAFILE="${i}.data" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
32 wget -q -O "${INFILE}.new" "${URLPREFIX}${PATPREFIX}${i}${URLSUFFIX}" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
33 |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
34 if test -e "${INFILE}.new"; then |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
35 # New data fetched, does old file exist? |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
36 if test -e "$INFILE"; then |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
37 # Yes, do a diff |
22
f025d36926a1
Make backup copies of old versions of data files when updating.
Matti Hamalainen <ccr@tnsp.org>
parents:
21
diff
changeset
|
38 if ! diff -u "$INFILE" "$INFILE.new" > "$INFILE.diff"; then |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
39 # There were differences, do a parse |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
40 parse=yes |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
41 mv "$INFILE" "$ONFILE" && \ |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
42 mv "$INFILE.new" "$INFILE" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
43 else |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
44 # No changes, apparently .. remove the new one |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
45 rm -f "$INFILE.new" "$INFILE.diff" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
46 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
47 else |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
48 # No old file, parse new data |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
49 mv "$INFILE.new" "$INFILE" |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
50 parse=yes |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
51 fi |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
52 fi |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
53 # No new file fetched, does datafile exist? |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
54 if test ! -e "${CACHEDIR}${DATAFILE}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
55 # No, try to parse it if old file input exists |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
56 parse=yes |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
57 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
58 |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
59 # Parsing of old data requested? |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
60 if test "x$parse" = "xyes" -a -e "$INFILE"; then |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
61 if test -e "${CACHEDIR}${DATAFILE}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
62 mv "${CACHEDIR}${DATAFILE}" "${OLDCACHEDIR}${DATAFILE}" |
22
f025d36926a1
Make backup copies of old versions of data files when updating.
Matti Hamalainen <ccr@tnsp.org>
parents:
21
diff
changeset
|
63 fi |
34 | 64 echo "Parsing $i" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
65 perl parsedata.pl -php "$INFILE" -o "${CACHEDIR}${DATAFILE}" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
66 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
67 done |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
68 fi |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
69 } |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
70 |
35
4d9354abda73
Update fetching URLs and parameters to match the latest changes on OAMK's web.
Matti Hamalainen <ccr@tnsp.org>
parents:
34
diff
changeset
|
71 CACHEDIR="cache/" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
72 OLDCACHEDIR="cache-old/" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
73 parse "http://www.oamk.fi/tyojarjestykset/otek/luokat/" "luokat.txt" "OR_" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
74 #parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "luokat.txt" "Ryh._" |
29
ac51fc10414f
Add support for URL prefix pattern in updates.
Matti Hamalainen <ccr@tnsp.org>
parents:
24
diff
changeset
|
75 |
31
dbe7ff545293
Add support for fetching and showing data for next/upcoming period.
Matti Hamalainen <ccr@tnsp.org>
parents:
29
diff
changeset
|
76 CACHEDIR="cache-next/" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
77 OLDCACHEDIR="cache-next-old/" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
78 parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "luokat_next.txt" "Ryh._" |