Mercurial > hg > lukkari
annotate update.sh @ 75:3d9e42477367
More improvements in the parsing and XML output.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 24 Oct 2012 07:33:28 +0300 |
parents | 2cfb0a7eac9b |
children | d1b65d9903ab |
rev | line source |
---|---|
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
1 #!/bin/sh |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
2 URLSUFFIX=".htm" |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
3 |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
4 function parse() |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
5 { |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
6 # Create cache directories, if they do not exist |
47
ceca06576918
Change backup (old) cache directories.
Matti Hamalainen <ccr@tnsp.org>
parents:
46
diff
changeset
|
7 OLDCACHEDIR="${CACHEDIR}old/" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
8 if test ! -d "${OLDCACHEDIR}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
9 mkdir -p "${OLDCACHEDIR}" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
10 fi |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
11 |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
12 if test ! -d "${CACHEDIR}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
13 mkdir -p "${CACHEDIR}" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
14 fi |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
15 |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
16 URLPREFIX="$1" |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
17 CLASSFILE="$2" |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
18 LISTFILE="$2.tmp" |
29
ac51fc10414f
Add support for URL prefix pattern in updates.
Matti Hamalainen <ccr@tnsp.org>
parents:
24
diff
changeset
|
19 PATPREFIX="$3" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
20 |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
21 if wget -q -O "$LISTFILE" "$URLPREFIX"; then |
29
ac51fc10414f
Add support for URL prefix pattern in updates.
Matti Hamalainen <ccr@tnsp.org>
parents:
24
diff
changeset
|
22 perl -ne "if (/<a href=\"${PATPREFIX}([A-Z]{3}\d\S+)${URLSUFFIX}\">/) { print \"\$1\n\"; }" < "$LISTFILE" > "$CLASSFILE" |
34 | 23 echo -n "* Fetched classfile $CLASSFILE: " |
24 cat "$CLASSFILE" | wc -l | |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
25 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
26 |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
27 if test -e "$CLASSFILE"; then |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
28 cat "$CLASSFILE" | while read i; do |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
29 parse=no |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
30 INFILE="${CACHEDIR}${i}.html" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
31 ONFILE="${OLDCACHEDIR}${i}.html" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
32 DATAFILE="${i}.data" |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
33 wget -q -O "${INFILE}.new" "${URLPREFIX}${PATPREFIX}${i}${URLSUFFIX}" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
34 |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
35 if test -e "${INFILE}.new"; then |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
36 # New data fetched, does old file exist? |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
37 if test -e "$INFILE"; then |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
38 # Yes, do a diff |
22
f025d36926a1
Make backup copies of old versions of data files when updating.
Matti Hamalainen <ccr@tnsp.org>
parents:
21
diff
changeset
|
39 if ! diff -u "$INFILE" "$INFILE.new" > "$INFILE.diff"; then |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
40 # There were differences, do a parse |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
41 parse=yes |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
42 mv "$INFILE" "$ONFILE" && \ |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
43 mv "$INFILE.new" "$INFILE" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
44 else |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
45 # No changes, apparently .. remove the new one |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
46 rm -f "$INFILE.new" "$INFILE.diff" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
47 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
48 else |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
49 # No old file, parse new data |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
50 mv "$INFILE.new" "$INFILE" |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
51 parse=yes |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
52 fi |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
53 fi |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
54 # No new file fetched, does datafile exist? |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
55 if test ! -e "${CACHEDIR}${DATAFILE}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
56 # No, try to parse it if old file input exists |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
57 parse=yes |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
58 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
59 |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
60 # Parsing of old data requested? |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
61 if test "x$parse" = "xyes" -a -e "$INFILE"; then |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
62 if test -e "${CACHEDIR}${DATAFILE}"; then |
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
63 mv "${CACHEDIR}${DATAFILE}" "${OLDCACHEDIR}${DATAFILE}" |
22
f025d36926a1
Make backup copies of old versions of data files when updating.
Matti Hamalainen <ccr@tnsp.org>
parents:
21
diff
changeset
|
64 fi |
34 | 65 echo "Parsing $i" |
46
9b5fa0f3812b
Improvements in the update logic.
Matti Hamalainen <ccr@tnsp.org>
parents:
42
diff
changeset
|
66 perl parsedata.pl -php "$INFILE" -o "${CACHEDIR}${DATAFILE}" |
75
3d9e42477367
More improvements in the parsing and XML output.
Matti Hamalainen <ccr@tnsp.org>
parents:
65
diff
changeset
|
67 perl parsedata.pl -xml "$INFILE" -o "${CACHEDIR}${i}.xml" |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
68 fi |
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
69 done |
24
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
70 fi |
1b8260151e99
Get updates / data from upcoming work-in-progress timetables for next period also.
Matti Hamalainen <ccr@tnsp.org>
parents:
22
diff
changeset
|
71 } |
6
7fca87c41e17
Added data fetching and updating shellscript.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
72 |
35
4d9354abda73
Update fetching URLs and parameters to match the latest changes on OAMK's web.
Matti Hamalainen <ccr@tnsp.org>
parents:
34
diff
changeset
|
73 CACHEDIR="cache/" |
65 | 74 parse "http://www.oamk.fi/tyojarjestykset/otek/luokat/" "classes.txt" "OR_" |
75 #parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "classes.txt" "Ryh._" | |
29
ac51fc10414f
Add support for URL prefix pattern in updates.
Matti Hamalainen <ccr@tnsp.org>
parents:
24
diff
changeset
|
76 |
31
dbe7ff545293
Add support for fetching and showing data for next/upcoming period.
Matti Hamalainen <ccr@tnsp.org>
parents:
29
diff
changeset
|
77 CACHEDIR="cache-next/" |
65 | 78 parse "http://www.oamk.fi/~heikkim/riihi2/Oppilaat/" "classes_next.txt" "Ryh._" |