-
Notifications
You must be signed in to change notification settings - Fork 0
/
hydrospider-cronjob.sh
executable file
·79 lines (61 loc) · 1.62 KB
/
hydrospider-cronjob.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/bash
# Executes a scrapy spider available in a virtualenv
#
# sleeplimit
# random sleep limit in seconds or empty not to sleep
#
# virtualenv
# absolute path to the spider's virtualenv
#
# scrapyroot
# absolute path to the root of a scrapy project
#
# spiderfile
# path to the spider file
#
# cronjoblog
# empty for no logging or path to log file
#set -x
sleeplimit="18000"
virtualenv="/home/self/dev/info-debit-virtualenv"
scrapyroot="/home/self/dev/info-debit-virtualenv/infodebit-scrapy"
spiderfile="/home/self/dev/info-debit-virtualenv/infodebit-scrapy/infodebit/spiders/hydrospider.py"
cronjoblog="$virtualenv/cronjob.log"
function log {
if [[ ! -z "$cronjoblog" ]]; then
echo `date +"[%Y-%m-%d %T]"` "$@" >> $cronjoblog
fi
}
function error {
log "ERROR: $@"
exit 1
}
function randomsleep {
if [[ ! -z "$sleeplimit" ]]; then
random_sleep="$(($RANDOM%$sleeplimit))"
minutes=`echo "$random_sleep / 60" | bc`
log "sleeping for $random_sleep seconds ($minutes minutes)..."
sleep "$random_sleep"
fi
}
function runspider {
log "executing: scrapy runspider $spiderfile"
scrapy runspider "$spiderfile" &>> $cronjoblog
}
log "`basename $0` starting"
if [[ ! -d "$virtualenv" ]]; then
error "could not find virtualenv ($virtualenv)"
fi
activate="$virtualenv/bin/activate"
if [[ ! -f "$activate" ]]; then
error "could not find bin/activate in $virtualenv"
fi
if [[ ! -d "$scrapyroot" ]]; then
error "could not find the project root ($scrapyroot)"
fi
log "activating virtualenv..."
source "$activate"
log "changing CWD to $scrapyroot"
cd "$scrapyroot"
randomsleep
runspider