Skip to content

Commit

Permalink
Updated uniprot
Browse files Browse the repository at this point in the history
Updated interpro
  • Loading branch information
Data Mover committed Sep 9, 2024
1 parent da184c9 commit 34d46c8
Show file tree
Hide file tree
Showing 19 changed files with 316 additions and 7 deletions.
Empty file modified .github/workflows/main.yml
100644 → 100755
Empty file.
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
58 changes: 58 additions & 0 deletions easyconfigs/b/BUSCO-db/BUSCO-db_extract.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
# ----------------SLURM Parameters----------------
#SBATCH -p admin
#SBATCH -n 4
#SBATCH --mem=20g
#SBATCH -N 1
#SBATCH --mail-user=datamover@igb.illinois.edu
#SBATCH --mail-type=ALL
#SBATCH -J BUSCO-db_extract
#SBATCH -D /home/a-m/datamover/jobs
#SBATCH -o %x-%j.out
# ----------------Load Modules--------------------
module load pigz/2.4-IGB-gcc-8.2.0
# ----------------Commands------------------------
#
# Replace DATABASE with name of database you are downloading
# Replace WEBSITE with remote location of database#
#

DATABASE="BUSCO-db"

if [ -z "$1" ];
then
echo "Please specify ${DATABASE} version number";
exit 1;
fi

VERSION=$1
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION}


echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting tar.gz Files"
for f in $(find ${MIRROR_DIR} -name '*.gz');
do
gzip -d $f
if [ $? -ne 0 ]; then
echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f"
exit $?
else
echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f"
fi
done

echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting .gz Files with Pigz"
pigz -p ${SLURM_NTASKS} -dr ${MIRROR_DIR}
if [ $? -ne 0 ]
then
echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting files Failed"
exit $?
else
echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files Complete"
fi

echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start"
find ${MIRROR_DIR} -type d -exec chmod 775 {} \;
find ${MIRROR_DIR} -type f -exec chmod 664 {} \;
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Completed"

27 changes: 27 additions & 0 deletions easyconfigs/c/checkv-db/checkv-db-1.5.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
easyblock = 'Bundle'

name = 'checkv-db'
version = '1.5'

homepage = 'https://portal.nersc.gov/CheckV/'
description = """The main change in v1.5 was to remove putatively non-viral sequences to minimize high-confidence matches to the CheckV database for non-viral sequences."""

toolchain = SYSTEM


dependencies = []

sanity_check_paths = {
'files': [],
'dirs': ['checkv-db-v1.5'],
}

modextrapaths = {
'BIODB':''
}

keeppreviousinstall = True
moduleclass = 'data'

modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n"

42 changes: 42 additions & 0 deletions easyconfigs/c/checkv-db/checkv-db_download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
# ----------------SLURM Parameters----------------
#SBATCH -p admin
#SBATCH -n 1
#SBATCH --mem=20g
#SBATCH -N 1
#SBATCH --mail-user=datamover@igb.illinois.edu
#SBATCH --mail-type=ALL
#SBATCH -J checkv-db_download
#SBATCH -D /home/a-m/datamover/jobs
#SBATCH -o %x-%j.out
# ----------------Load Modules--------------------
# ----------------Commands------------------------
#
# Replace DATABASE with name of database you are downloading
# Replace WEBSITE with remote location of database
#

DATABASE="checkv-db"

if [ -z "$1" ];
then
echo "Please specify ${DATABASE} version number";
exit 1;
fi

VERSION=$1
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION}

echo "`date "+%Y-%m-%d %k:%M:%S"` Downloading Files"
mkdir -p ${MIRROR_DIR}
wget https://portal.nersc.gov/CheckV/checkv-db-v${VERSION}.tar.gz -P ${MIRROR_DIR}/
if [ $? -ne 0 ]
then
echo "`date "+%Y-%m-%d %k:%M:%S"` Downloading Files Failed"
exit $?
else
echo "`date "+%Y-%m-%d %k:%M:%S"` Downloading Files Complete"
fi



50 changes: 50 additions & 0 deletions easyconfigs/c/checkv-db/checkv-db_extract.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
# ----------------SLURM Parameters----------------
#SBATCH -p admin
#SBATCH -n 4
#SBATCH --mem=20g
#SBATCH -N 1
#SBATCH --mail-user=datamover@igb.illinois.edu
#SBATCH --mail-type=ALL
#SBATCH -J checkv-db_extract
#SBATCH -D /home/a-m/datamover/jobs
#SBATCH -o %x-%j.out
# ----------------Load Modules--------------------
module load pigz/2.4-IGB-gcc-8.2.0
# ----------------Commands------------------------
#
# Replace DATABASE with name of database you are downloading
# Replace WEBSITE with remote location of database#
#

DATABASE="checkv-db"

if [ -z "$1" ];
then
echo "Please specify ${DATABASE} version number";
exit 1;
fi

VERSION=$1
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION}


echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting tar.gz Files"
for f in $(find ${MIRROR_DIR} -name '*.tar.gz');
do
tar -xvzf $f -C `dirname $f`
if [ $? -ne 0 ]; then
echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f"
exit $?
else
echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f"
fi
done

#Run diamond to create index
diamond makedb -p $SLURM_NTASKS --in ${MIRROR_DIR}/checkv-db-v${VERSION}/genome_db/checkv_reps.faa --db ${MIRROR_DIR}/checkv-db-v${VERSION}/genome_db/checkv_reps.dmnd
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start"
find ${MIRROR_DIR} -type d -exec chmod 775 {} \;
find ${MIRROR_DIR} -type f -exec chmod 664 {} \;
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Completed"

29 changes: 29 additions & 0 deletions easyconfigs/d/dorado-db/dorado-db-20240501.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
easyblock = 'Bundle'

name = 'dorado-db'
version = '20240501'

homepage = 'https://github.com/nanoporetech/dorado'
description = """Databases for dorado"""

toolchain = SYSTEM

source_urls = ['']
sources = []

dependencies = []

sanity_check_paths = {
'files': [],
'dirs': ['dna_r9.4.1_e8_fast@v3.4'],
}

modextrapaths = {
'BIODB':''
}

keeppreviousinstall = True
moduleclass = 'data'

modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n"

35 changes: 35 additions & 0 deletions easyconfigs/d/dorado-db/dorado-db_download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# ----------------SLURM Parameters----------------
#SBATCH -p admin
#SBATCH -n 1
#SBATCH --mem=20g
#SBATCH -N 1
#SBATCH --mail-user=datamover@igb.illinois.edu
#SBATCH --mail-type=ALL
#SBATCH -J dorado-db_download
#SBATCH -D /home/a-m/datamover/jobs
#SBATCH -o %x-%j.out
# ----------------Load Modules--------------------
module load dorado
# ----------------Commands------------------------
#
# Replace DATABASE with name of database you are downloading
# Replace WEBSITE with remote location of database
#

DATABASE="dorado-db"

if [ -z "$1" ];
then
echo "Please specify ${DATABASE} version number";
exit 1;
fi

VERSION=$1
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION}

echo "`date "+%Y-%m-%d %k:%M:%S"` Downloading Files"
mkdir -p ${MIRROR_DIR}
cd ${MIRROR_DIR}
dorado download

30 changes: 30 additions & 0 deletions easyconfigs/f/funannotate-db/funannotate-db-20240515.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
easyblock = 'Bundle'

name = 'funannotate-db'
version = '20240515'

homepage = 'https://github.com/nextgenusfs/funannotate'
description = """funannotate is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes). Installation, usage, and more information can be found at http://funannotate.readthedocs.io"""

toolchain = SYSTEM

source_urls = ['']
sources = []

dependencies = []

sanity_check_paths = {
'files': ['uniprot.dmnd'],
'dirs': [],
}

modextrapaths = {
'BIODB':'',
'FUNANNOTATE_DB': ''
}

keeppreviousinstall = True
moduleclass = 'data'

modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n"

4 changes: 2 additions & 2 deletions easyconfigs/i/interpro/interpro_download.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ----------------Load Modules--------------------
module load globus-cli/3.18.0-IGB-gcc-8.2.0-Python-3.7.2
module load globus-cli/3.30.1-IGB-gcc-8.2.0-Python-3.10.1

DATABASE="interpro"

Expand All @@ -14,6 +14,6 @@ MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION}

echo "`date "+%Y-%m-%d %k:%M:%S"` Downloading Files"
mkdir -p ${MIRROR_DIR}
globus transfer -r --exclude "reference_proteomes" --exclude "taxonomic_divisions" "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/interpro/releases/$VERSION/" "1ccc563b-0542-44e5-a13c-fc4b00281b72:${MIRROR_DIR}/"
globus transfer -r --exclude "reference_proteomes" --exclude "taxonomic_divisions" "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/interpro/releases/$VERSION/" "4a467fda-f559-4fc3-b54a-e2842f439e06:${MIRROR_DIR}/"


Empty file modified easyconfigs/p/pgap-db/pgap-db-2021-07-01.build5508
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion easyconfigs/r/R/R_mirror_update.pl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
my $cran_dir="/private_stores/mirror/R/cran";
my $cran_exclude="--exclude='bin'";

my @bioc_versions = ('3.18');
my @bioc_versions = ('3.19');

foreach my $version (@bioc_versions) {

Expand Down
2 changes: 1 addition & 1 deletion easyconfigs/templates/globus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ VERSION=$1
DATABASE=""
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION}
FASTA_DIR=${MIRROR_DIR}/db
BIOTRANSFER_UUID="1ccc563b-0542-44e5-a13c-fc4b00281b72"
BIOTRANSFER_UUID="4a467fda-f559-4fc3-b54a-e2842f439e06"
SOURCE_UUID=""

echo "Downloading Files: `date "+%Y-%m-%d %k:%M:%S"`"
Expand Down
Binary file added easyconfigs/u/uniprot/.uniprot_download.sh.swp
Binary file not shown.
2 changes: 1 addition & 1 deletion easyconfigs/u/uniprot/uniprot-2023_05.eb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
easyblock = 'Bundle'

name = 'uniprot'
version = '2023_04'
version = '2023_05'


homepage = 'https://www.uniprot.org/'
Expand Down
38 changes: 38 additions & 0 deletions easyconfigs/u/uniprot/uniprot_2024_04.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
easyblock = 'Bundle'

name = 'uniprot'
version = '2024_04'


homepage = 'https://www.uniprot.org/'
description = """The mission of UniProt is to provide the scientific community with a comprehensive, high-quality and freely accessible resource of protein sequence and functional information. """

toolchain = SYSTEM

dependencies = []


sanity_check_paths = {
'files': ['db/knowledgebase/complete/uniprot_sprot.fasta',
'db/knowledgebase/complete/uniprot_trembl.fasta',
'db/knowledgebase/idmapping/idmapping.dat',
'db/uniref/uniref100/uniref100.fasta',
'db/uniref/uniref90/uniref90.fasta',
'db/uniref/uniref50/uniref50.fasta'
],
'dirs': ['db','blastdb_v4','blastdb_v5','diamond']
}

modextrapaths = {'BIODB':'db',
'BLASTDB': ['blastdb_v4','blastdb_v5'],
'DIAMONDDB': 'diamond',
}

keeppreviousinstall = True

moduleclass = 'data'

modloadmsg = "%(name)s/%(version)s database and indexes are located at %(installdir)s/\n"

##Helpful documentation
#https://embl.service-now.com/kb?id=kb_article_view&sysparm_article=KB0011060
4 changes: 2 additions & 2 deletions easyconfigs/u/uniprot/uniprot_download.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ----------------Load Modules--------------------
module load globus-cli/3.18.0-IGB-gcc-8.2.0-Python-3.7.2
module load globus-cli/3.30.1-IGB-gcc-8.2.0-Python-3.10.1

# ----------------Commands------------------------

Expand All @@ -15,7 +15,7 @@ FASTA_DIR=${MIRROR_DIR}/db

echo "Downloading Files: `date "+%Y-%m-%d %k:%M:%S"`"
mkdir -p ${FASTA_DIR}
globus transfer -r --exclude "reference_proteomes" --exclude "taxonomic_divisions" "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/uniprot/current_release/" "1ccc563b-0542-44e5-a13c-fc4b00281b72:${FASTA_DIR}/"
globus transfer -r --exclude "reference_proteomes" --exclude "taxonomic_divisions" "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/uniprot/current_release/" "4a467fda-f559-4fc3-b54a-e2842f439e06:${FASTA_DIR}/"



0 comments on commit 34d46c8

Please sign in to comment.