diff --git a/easyconfigs/e/ena/ena-20240909.eb b/easyconfigs/e/ena/ena-20240909.eb new file mode 100644 index 0000000..4f29f19 --- /dev/null +++ b/easyconfigs/e/ena/ena-20240909.eb @@ -0,0 +1,28 @@ +easyblock = 'Bundle' + +name = 'ena' +version = '20240909' + +homepage = 'https://www.ebi.ac.uk/ena' +description = """The European Nucleotide Archive (ENA) captures and presents information relating to experimental workflows that are based around nucleotide sequencing. """ + +toolchain = SYSTEM + +source_urls = [''] +sources = [] + +dependencies = [] + +sanity_check_paths = { + 'files': [], + 'dirs': ['sequence','wgs'], +} + +modextrapaths = {'BIODB':'' +} + +keeppreviousinstall = True +moduleclass = 'data' + +modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n" + diff --git a/easyconfigs/e/ena/ena_download.sh b/easyconfigs/e/ena/ena_download.sh index c832644..2cfcb56 100755 --- a/easyconfigs/e/ena/ena_download.sh +++ b/easyconfigs/e/ena/ena_download.sh @@ -17,8 +17,8 @@ echo "Downloading Files: `date "+%Y-%m-%d %k:%M:%S"`" #mkdir ${MIRROR_DIR}/wgs #mkdir ${MIRROR_DIR}/sequence -globus transfer --preserve-timestamp --skip-source-errors --delete -r "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/ena/wgs/" "1ccc563b-0542-44e5-a13c-fc4b00281b72:${MIRROR_DIR}/wgs/" -globus transfer --preserve-timestamp --skip-source-errors --delete -r "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/ena/sequence/snapshot_latest/" "1ccc563b-0542-44e5-a13c-fc4b00281b72:${MIRROR_DIR}/sequence/" +globus transfer --preserve-timestamp --skip-source-errors --delete -r "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/ena/wgs/" "4a467fda-f559-4fc3-b54a-e2842f439e06:${MIRROR_DIR}/wgs/" +globus transfer --preserve-timestamp --skip-source-errors --delete -r "47772002-3e5b-4fd3-b97c-18cee38d6df2:/pub/databases/ena/sequence/snapshot_latest/" "4a467fda-f559-4fc3-b54a-e2842f439e06:${MIRROR_DIR}/sequence/" diff --git a/easyconfigs/e/ena/ena_extract.sh b/easyconfigs/e/ena/ena_extract.sh new file mode 100755 index 0000000..0f2e56c --- /dev/null +++ b/easyconfigs/e/ena/ena_extract.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# ----------------SLURM Parameters---------------- +#SBATCH -p admin +#SBATCH -n 1 +#SBATCH --mem=20g +#SBATCH -N 1 +#SBATCH --mail-user=datamover@igb.illinois.edu +#SBATCH --mail-type=ALL +#SBATCH -J ena_extract +#SBATCH -D /home/a-m/datamover/jobs +#SBATCH -o %x-%j.out +# ----------------Load Modules-------------------- +module load pigz/2.4-IGB-gcc-8.2.0 +# ----------------Commands------------------------ +# +# Replace DATABASE with name of database you are downloading +# Replace WEBSITE with remote location of database# +# + +DATABASE="ena" + +if [ -z "$1" ]; +then + echo "Please specify ${DATABASE} version number"; + exit 1; +fi + +VERSION=$1 +MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION} + + +echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files" +for f in $(find ${MIRROR_DIR} -name '*.gz'); +do + gunzip $f + if [ $? -ne 0 ]; then + echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" + exit 1 + else + echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f" + fi +done + +echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start" +find ${MIRROR_DIR} -type d -exec chmod 775 {} \; +find ${MIRROR_DIR} -type f -exec chmod 664 {} \; +echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Completed" + diff --git a/easyconfigs/i/interpro/interpro-101.0.eb b/easyconfigs/i/interpro/interpro-101.0.eb new file mode 100644 index 0000000..6239019 --- /dev/null +++ b/easyconfigs/i/interpro/interpro-101.0.eb @@ -0,0 +1,28 @@ +easyblock = 'Bundle' + +name = 'interpro' +version = '101.0' + +homepage = 'https://www.ebi.ac.uk/interpro/' +description = """InterPro provides functional analysis of proteins by classifying them into families and predicting domains and important sites.""" + +toolchain = SYSTEM + +source_urls = [''] +sources = [] + +dependencies = [] + +sanity_check_paths = { + 'files': ['interpro.xml'], + 'dirs': [], +} + +modextrapaths = {'BIODB':'' +} + +keeppreviousinstall = True +moduleclass = 'data' + +modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n" + diff --git a/easyconfigs/i/interpro/interpro_extract.sh b/easyconfigs/i/interpro/interpro_extract.sh index 5476c88..544a4b6 100755 --- a/easyconfigs/i/interpro/interpro_extract.sh +++ b/easyconfigs/i/interpro/interpro_extract.sh @@ -1,7 +1,7 @@ #!/bin/bash # ----------------SLURM Parameters---------------- #SBATCH -p admin -#SBATCH -n 4 +#SBATCH -n 1 #SBATCH --mem=20g #SBATCH -N 1 #SBATCH --mail-user=datamover@igb.illinois.edu @@ -30,9 +30,9 @@ MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION} echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files" -for f in $(find ${MIRROR_DIR} -name '*.tar.gz'); +for f in $(find ${MIRROR_DIR} -name '*.gz'); do - tar -xvzf $f -C dirname $f + gunzip $f if [ $? -ne 0 ]; then echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" exit 1 @@ -41,15 +41,6 @@ do fi done -pigz -p $SLURM_NTASKS -dr $MIRROR_DIR -if [ $? -ne 0 ] -then - echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting files Failed" - exit $? -else - echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files Complete" -fi - echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start" find ${MIRROR_DIR} -type d -exec chmod 775 {} \; find ${MIRROR_DIR} -type f -exec chmod 664 {} \; diff --git a/easyconfigs/i/interpro/interpro_extract.sh.backup b/easyconfigs/i/interpro/interpro_extract.sh.backup new file mode 100755 index 0000000..5476c88 --- /dev/null +++ b/easyconfigs/i/interpro/interpro_extract.sh.backup @@ -0,0 +1,57 @@ +#!/bin/bash +# ----------------SLURM Parameters---------------- +#SBATCH -p admin +#SBATCH -n 4 +#SBATCH --mem=20g +#SBATCH -N 1 +#SBATCH --mail-user=datamover@igb.illinois.edu +#SBATCH --mail-type=ALL +#SBATCH -J interpro_extract +#SBATCH -D /home/a-m/datamover/jobs +#SBATCH -o %x-%j.out +# ----------------Load Modules-------------------- +module load pigz/2.4-IGB-gcc-8.2.0 +# ----------------Commands------------------------ +# +# Replace DATABASE with name of database you are downloading +# Replace WEBSITE with remote location of database# +# + +DATABASE="interpro" + +if [ -z "$1" ]; +then + echo "Please specify ${DATABASE} version number"; + exit 1; +fi + +VERSION=$1 +MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION} + + +echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files" +for f in $(find ${MIRROR_DIR} -name '*.tar.gz'); +do + tar -xvzf $f -C dirname $f + if [ $? -ne 0 ]; then + echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" + exit 1 + else + echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f" + fi +done + +pigz -p $SLURM_NTASKS -dr $MIRROR_DIR +if [ $? -ne 0 ] +then + echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting files Failed" + exit $? +else + echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files Complete" +fi + +echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start" +find ${MIRROR_DIR} -type d -exec chmod 775 {} \; +find ${MIRROR_DIR} -type f -exec chmod 664 {} \; +echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Completed" + diff --git a/easyconfigs/p/pfam/pfam-37.0.eb b/easyconfigs/p/pfam/pfam-37.0.eb new file mode 100644 index 0000000..2a16d17 --- /dev/null +++ b/easyconfigs/p/pfam/pfam-37.0.eb @@ -0,0 +1,28 @@ +easyblock = 'Bundle' + +name = 'pfam' +version = '37.0' + +homepage = 'https://pfam.xfam.org/' +description = """The Pfam database is a large collection of protein families, each represented by multiple sequence alignments and hidden Markov models (HMMs)""" + +toolchain = SYSTEM + +source_urls = [''] +sources = [] + +dependencies = [] + +sanity_check_paths = { + 'files': ['Pfam-A.fasta'], + 'dirs': [], +} + +modextrapaths = {'BIODB':'' +} + +keeppreviousinstall = True +moduleclass = 'data' + +modloadmsg = "%(name)s/%(version)s database and indexes are located at %(installdir)s\n" + diff --git a/easyconfigs/p/pfam/pfam_extract.sh b/easyconfigs/p/pfam/pfam_extract.sh index a5cf055..d5f8b49 100755 --- a/easyconfigs/p/pfam/pfam_extract.sh +++ b/easyconfigs/p/pfam/pfam_extract.sh @@ -1,7 +1,7 @@ #!/bin/bash # ----------------SLURM Parameters---------------- #SBATCH -p admin -#SBATCH -n 4 +#SBATCH -n 1 #SBATCH --mem=20g #SBATCH -N 1 #SBATCH --mail-user=datamover@igb.illinois.edu @@ -10,7 +10,7 @@ #SBATCH -D /home/a-m/datamover/jobs #SBATCH -o %x-%j.out # ----------------Load Modules-------------------- -module load pigz/2.4-IGB-gcc-8.2.0 + # ----------------Commands------------------------ if [ -z "$1" ]; @@ -25,14 +25,29 @@ MIRROR_DIR=/private_stores/mirror/pfam/${VERSION} echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files" -pigz -p $SLURM_NTASKS -dr $MIRROR_DIR -if [ $? -ne 0 ] -then - echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting files Failed" - exit 1 -else - echo "Extracting Files Complete: `date "+%Y-%m-%d %k:%M:%S"`" -fi +echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting .gz Files" +for f in $(find ${MIRROR_DIR} -name '*.gz'); +do + gunzip $f + if [ $? -ne 0 ]; then + echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" + exit $? + else + echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f" + fi +done + +echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting .tgz Files" +for f in $(find ${MIRROR_DIR} -name '*.tgz'); +do + tar -xvzf $f -C `dirname $f` + if [ $? -ne 0 ]; then + echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" + exit $? + else + echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f" + fi +done echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start" find $MIRROR_DIR -type d -exec chmod 775 {} \; diff --git a/easyconfigs/u/uniprot/uniprot_2024_04.eb b/easyconfigs/u/uniprot/uniprot_2024_04.eb index 88b7691..12a3d74 100644 --- a/easyconfigs/u/uniprot/uniprot_2024_04.eb +++ b/easyconfigs/u/uniprot/uniprot_2024_04.eb @@ -20,11 +20,11 @@ sanity_check_paths = { 'db/uniref/uniref90/uniref90.fasta', 'db/uniref/uniref50/uniref50.fasta' ], - 'dirs': ['db','blastdb_v4','blastdb_v5','diamond'] + 'dirs': ['db','blastdb_v5','diamond'] } modextrapaths = {'BIODB':'db', - 'BLASTDB': ['blastdb_v4','blastdb_v5'], + 'BLASTDB': ['blastdb_v5'], 'DIAMONDDB': 'diamond', } diff --git a/easyconfigs/u/uniprot/uniprot_indexes.sh b/easyconfigs/u/uniprot/uniprot_indexes.sh index 8e71a28..4f2bc06 100755 --- a/easyconfigs/u/uniprot/uniprot_indexes.sh +++ b/easyconfigs/u/uniprot/uniprot_indexes.sh @@ -25,7 +25,6 @@ fi VERSION=$1 MIRROR_DIR=/private_stores/mirror/uniprot FASTA_DIR=$MIRROR_DIR/$VERSION/db -BLASTV4_DIR=$MIRROR_DIR/$VERSION/blastdb_v4 BLASTV5_DIR=$MIRROR_DIR/$VERSION/blastdb_v5 DIAMOND_DIR=$MIRROR_DIR/$VERSION/diamond DIAMOND_OPTS="--quiet --threads $SLURM_NTASKS" @@ -39,7 +38,6 @@ fi echo "`date "+%Y-%m-%d %k:%M:%S"` Creating Directories" -mkdir -p $BLASTV4_DIR mkdir -p $BLASTV5_DIR mkdir -p $DIAMOND_DIR @@ -50,17 +48,6 @@ for f in ${FASTA_FILES[@]}; do FASTA_NAME=`basename $f` DB_NAME=`basename $f .fasta` - #Make blast v4 indexes - echo "`date "+%Y-%m-%d %k:%M:%S"` Creating Blast v4 Index for File: $FULL_PATH" - - makeblastdb -dbtype prot -title $DB_NAME -in $FULL_PATH -out $BLASTV4_DIR/$DB_NAME -blastdb_version 4 - if [ $? -ne 0 ]; then - echo "`date "+%Y-%m-%d %k:%M:%S"` Error creating Blast v4 index for file: $FULL_PATH" - exit 1 - else - echo "`date "+%Y-%m-%d %k:%M:%S"` Done Creating Blast v4 Index for File: $FULL_PATH" - fi - #Make blast v5 indexes echo "`date "+%Y-%m-%d %k:%M:%S"` Creating Blast v5 Index for File: $FULL_PATH" makeblastdb -dbtype prot -title $DB_NAME -in $FULL_PATH -out $BLASTV5_DIR/$DB_NAME -blastdb_version 5