-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated scripts to use globus commands
Updated extraction scripts Updated pfam Updated interpro Updated uniprot Updated ena
- Loading branch information
Data Mover
committed
Sep 23, 2024
1 parent
b56160d
commit ebb8ae1
Showing
10 changed files
with
221 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
easyblock = 'Bundle' | ||
|
||
name = 'ena' | ||
version = '20240909' | ||
|
||
homepage = 'https://www.ebi.ac.uk/ena' | ||
description = """The European Nucleotide Archive (ENA) captures and presents information relating to experimental workflows that are based around nucleotide sequencing. """ | ||
|
||
toolchain = SYSTEM | ||
|
||
source_urls = [''] | ||
sources = [] | ||
|
||
dependencies = [] | ||
|
||
sanity_check_paths = { | ||
'files': [], | ||
'dirs': ['sequence','wgs'], | ||
} | ||
|
||
modextrapaths = {'BIODB':'' | ||
} | ||
|
||
keeppreviousinstall = True | ||
moduleclass = 'data' | ||
|
||
modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
# ----------------SLURM Parameters---------------- | ||
#SBATCH -p admin | ||
#SBATCH -n 1 | ||
#SBATCH --mem=20g | ||
#SBATCH -N 1 | ||
#SBATCH --mail-user=datamover@igb.illinois.edu | ||
#SBATCH --mail-type=ALL | ||
#SBATCH -J ena_extract | ||
#SBATCH -D /home/a-m/datamover/jobs | ||
#SBATCH -o %x-%j.out | ||
# ----------------Load Modules-------------------- | ||
module load pigz/2.4-IGB-gcc-8.2.0 | ||
# ----------------Commands------------------------ | ||
# | ||
# Replace DATABASE with name of database you are downloading | ||
# Replace WEBSITE with remote location of database# | ||
# | ||
|
||
DATABASE="ena" | ||
|
||
if [ -z "$1" ]; | ||
then | ||
echo "Please specify ${DATABASE} version number"; | ||
exit 1; | ||
fi | ||
|
||
VERSION=$1 | ||
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION} | ||
|
||
|
||
echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files" | ||
for f in $(find ${MIRROR_DIR} -name '*.gz'); | ||
do | ||
gunzip $f | ||
if [ $? -ne 0 ]; then | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" | ||
exit 1 | ||
else | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f" | ||
fi | ||
done | ||
|
||
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start" | ||
find ${MIRROR_DIR} -type d -exec chmod 775 {} \; | ||
find ${MIRROR_DIR} -type f -exec chmod 664 {} \; | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Completed" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
easyblock = 'Bundle' | ||
|
||
name = 'interpro' | ||
version = '101.0' | ||
|
||
homepage = 'https://www.ebi.ac.uk/interpro/' | ||
description = """InterPro provides functional analysis of proteins by classifying them into families and predicting domains and important sites.""" | ||
|
||
toolchain = SYSTEM | ||
|
||
source_urls = [''] | ||
sources = [] | ||
|
||
dependencies = [] | ||
|
||
sanity_check_paths = { | ||
'files': ['interpro.xml'], | ||
'dirs': [], | ||
} | ||
|
||
modextrapaths = {'BIODB':'' | ||
} | ||
|
||
keeppreviousinstall = True | ||
moduleclass = 'data' | ||
|
||
modloadmsg = "%(name)s/%(version)s database is located at %(installdir)s\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#!/bin/bash | ||
# ----------------SLURM Parameters---------------- | ||
#SBATCH -p admin | ||
#SBATCH -n 4 | ||
#SBATCH --mem=20g | ||
#SBATCH -N 1 | ||
#SBATCH --mail-user=datamover@igb.illinois.edu | ||
#SBATCH --mail-type=ALL | ||
#SBATCH -J interpro_extract | ||
#SBATCH -D /home/a-m/datamover/jobs | ||
#SBATCH -o %x-%j.out | ||
# ----------------Load Modules-------------------- | ||
module load pigz/2.4-IGB-gcc-8.2.0 | ||
# ----------------Commands------------------------ | ||
# | ||
# Replace DATABASE with name of database you are downloading | ||
# Replace WEBSITE with remote location of database# | ||
# | ||
|
||
DATABASE="interpro" | ||
|
||
if [ -z "$1" ]; | ||
then | ||
echo "Please specify ${DATABASE} version number"; | ||
exit 1; | ||
fi | ||
|
||
VERSION=$1 | ||
MIRROR_DIR=/private_stores/mirror/${DATABASE}/${VERSION} | ||
|
||
|
||
echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files" | ||
for f in $(find ${MIRROR_DIR} -name '*.tar.gz'); | ||
do | ||
tar -xvzf $f -C dirname $f | ||
if [ $? -ne 0 ]; then | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Error extracting file: $f" | ||
exit 1 | ||
else | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Done extracting file: $f" | ||
fi | ||
done | ||
|
||
pigz -p $SLURM_NTASKS -dr $MIRROR_DIR | ||
if [ $? -ne 0 ] | ||
then | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting files Failed" | ||
exit $? | ||
else | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Extracting Files Complete" | ||
fi | ||
|
||
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Start" | ||
find ${MIRROR_DIR} -type d -exec chmod 775 {} \; | ||
find ${MIRROR_DIR} -type f -exec chmod 664 {} \; | ||
echo "`date "+%Y-%m-%d %k:%M:%S"` Fix Permissions Completed" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
easyblock = 'Bundle' | ||
|
||
name = 'pfam' | ||
version = '37.0' | ||
|
||
homepage = 'https://pfam.xfam.org/' | ||
description = """The Pfam database is a large collection of protein families, each represented by multiple sequence alignments and hidden Markov models (HMMs)""" | ||
|
||
toolchain = SYSTEM | ||
|
||
source_urls = [''] | ||
sources = [] | ||
|
||
dependencies = [] | ||
|
||
sanity_check_paths = { | ||
'files': ['Pfam-A.fasta'], | ||
'dirs': [], | ||
} | ||
|
||
modextrapaths = {'BIODB':'' | ||
} | ||
|
||
keeppreviousinstall = True | ||
moduleclass = 'data' | ||
|
||
modloadmsg = "%(name)s/%(version)s database and indexes are located at %(installdir)s\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters