-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrefseq_protein.build
38 lines (27 loc) · 1012 Bytes
/
refseq_protein.build
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
. databases.config
set -e
DATE=`date +%Y%m%d`
install -g ${GROUP} --mode=755 -d ${PREFIX}/refseq
install -g ${GROUP} --mode=755 -d ${PREFIX}/refseq/${DATE}
install -g ${GROUP} --mode=755 -d ${PREFIX}/refseq/${DATE}/blast
install -g ${GROUP} --mode=755 -d ${PREFIX}/refseq/${DATE}/diamond
cd ${PREFIX}/refseq/${DATE}
rsync -v --no-motd rsync://ftp.ncbi.nlm.nih.gov/blast/db/refseq_protein*gz .
cd blast
for f in ../*.gz; do
tar xzf ${f}
done
rm ../*gz
cd ..
# needed to encode taxonomy information
wget -q ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.FULL.gz
blastdbcmd -db blast/refseq_protein -entry all > refseq_protein.faa
diamond makedb --db diamond/refseq_protein --in refseq_protein.faa \
--threads ${THREADS} \
--taxonnodes ${PREFIX}/ncbi_taxonomy/nodes.dmp \
--taxonnames ${PREFIX}/ncbi_taxonomy/names.dmp \
--taxonmap prot.accession2taxid.FULL.gz
rm refseq_protein.faa prot.accession2taxid.FULL.gz
cd ..
rm -f current
ln -s ${DATE} current