-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathsource_refs.sh
executable file
·130 lines (106 loc) · 3.25 KB
/
source_refs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#! /bin/bash -e
# ===================================================================
# LIONS analysis pipeline - commandline arguments
# ===================================================================
#
# To download known (currently hg19 or hg38) references and associated
# resources for the appropriate online repositories into directoryies
# containing the required sub-directory structure
#
# Details can be found in README
#
echo ''
echo ''
echo '==============================================================='
echo '============== L I O N S Resource Download ===================='
echo '==============================================================='
echo ''' _ _
_/ \|/ \_
/\\/ \//\
\|/<\ />\|/ *RAWR*
/\ _ /\ /
\|/\ Y /\|/
\/|v-v|\/
\/\_/\/
'''
echo ''
if [ $# -ne 1 ]
then
cat << EOF
incorrect number of parameters.
usage:
source_refs.sh <reference>
where <reference> is either hg38 or hg19
EOF
exit 1
fi
wgt=`command -v wget`
crl=`command -v curl`
ref=$1
cmd=""
if [ wgt ]
then
cmd="$wgt"
elif [ crl ]
then
cmd="$crl -O"
else
cat << EOF
unable to find wget or curl
EOF
exit 1
fi
if [ $ref == "hg38" ]
then
# genomeName = hg38 ========================
mkdir -p hg38/{genome,annotation,repeat}
cd hg38/genome
# Genome
echo "Downloading Reference: hg38.fa.gz"
$cmd http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
gunzip -v hg38.fa.gz
# Gene Annotation (RefSeq)
cd ../annotation
echo "Downloading RefGene resource: refseq_hg38.ucsc.gz"
$cmd https://s3-us-west-2.amazonaws.com/lionproject/resources/hg38/refseq_hg38.ucsc.gz
gunzip -v refseq_hg38.ucsc.gz
# Repeat Masker
cd ../repeat
echo "Downloading Repeat Masker resource: rm_hg38.ucsc.gz"
$cmd https://s3-us-west-2.amazonaws.com/lionproject/resources/hg38/rm_hg38.ucsc.gz
gunzip -v rm_hg38.ucsc.gz
echo "hg38 successfully downloaded"
elif [ $ref == "hg19" ]
then
# genomeName = hg19 ========================
mkdir -p hg19/{genome,annotation,repeat}
cd hg19/genome
# Genome
echo "Downloading Reference: hg19.2bit"
$cmd http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.2bit
$cmd http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/twoBitToFa
chmod 755 twoBitToFa
./twoBitToFa hg19.2bit hg19.fa
if [ -e hg19.fa ]
then
rm -v hg19.2bit twoBitToFa
else
echo "\nPlease run 'twoBitToFa hg19.2bit hg19.fa' manually to convert hg19 for LIONS\n"
fi
# Gene Annotation (RefSeq)
cd ../annotation
echo "Downloading RefGene resource: refseq_hg19.ucsc"
$cmd https://s3-us-west-2.amazonaws.com/lionproject/resources/hg19/refSeq_hg19.ucsc.zip
unzip refSeq_hg19.ucsc.zip && rm -v refSeq_hg19.ucsc.zip
# Repeat Masker
cd ../repeat
echo "Downloading Repeat Masker resource: rm_hg19.ucsc"
$cmd https://s3-us-west-2.amazonaws.com/lionproject/resources/hg19/rm_hg19.ucsc.zip
unzip rm_hg19.ucsc.zip && rm -v rm_hg19.ucsc.zip
echo "hg19 successfully downloaded"
else
cat << EOF
$1 not a known reference.
Please note, this script currently only recognises human references hg19 and hg38.
EOF
fi