Skip to content

Commit 7f7e89b

Browse files
committed
SLURM fixes
1 parent 7f0db0f commit 7f7e89b

File tree

2 files changed

+182
-96
lines changed

2 files changed

+182
-96
lines changed
+83-43
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,129 @@
11
#!/bin/bash
22
#SBATCH -c 64
33
#SBATCH -t 0
4-
#SBATCH -p <insert partition here>
4+
#SBATCH -p eddy
55
#SBATCH --mem=64000
66
#SBATCH -o logs/rna3db_full_release_%j.out
77
#SBATCH -e logs/rna3db_full_release_%j.err
8-
#SBATCH --mail-user=<insert email address here>
8+
#SBATCH --mail-user=marcellszikszai@fas.harvard.edu
99
#SBATCH --mail-type=ALL
1010

1111
# where you want the release to be output to
12-
OUTPUT_DIR=""
12+
OUTPUT_DIR="/n/eddy_lab/users/mszikszai/rna3db/data/test_auto_release/output"
1313

1414
# you set these once and forget
15-
RNA3DB_ROOT_DIR=""
16-
MMCIF_DIR=""
17-
CMSCAN=""
18-
CMDB=""
15+
RNA3DB_ROOT_DIR="/n/eddy_lab/users/mszikszai/rna3db/"
16+
PDB_MMCIF_DIR="/n/eddy_lab/users/mszikszai/rna3db/data/test_auto_release/old"
17+
CMSCAN="/n/eddy_lab/users/mszikszai/infernal/binaries/cmscan"
18+
CMDB="/n/eddy_lab/users/mszikszai/rna3db-scans/rfams/14.10/Rfam.cm"
1919

20+
# set useful constants
2021
NEW_RELEASE_DATE=$(date +"%Y-%m-%d")
21-
mkdir -p $OUTPUT_DIR/$NEW_RELEASE_DATE
22+
JSON_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons
23+
MMCIF_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs
24+
CMSCAN_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
25+
FASTA_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-fastas
26+
UPLOAD_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/uploads
27+
28+
# make directories
29+
mkdir -p $JSON_PATH
30+
mkdir -p $MMCIF_PATH
31+
mkdir -p $CMSCAN_PATH
32+
mkdir -p $FASTA_PATH
33+
mkdir -p $UPLOAD_PATH
2234

2335
# prepare the env
2436
mamba activate rna3db
2537

2638
# download latest mmCIF files
27-
bash $RNA3DB_ROOT_DIR/scripts/download_pdb_mmcif.sh $MMCIF_DIR
39+
echo "Downloading..."
40+
#bash $RNA3DB_ROOT_DIR/scripts/download_pdb_mmcif.sh $PDB_MMCIF_DIR
41+
echo "Finished downloading."
2842

2943
# run parse
44+
echo "Parsing..."
3045
python -m rna3db parse \
31-
$MMCIF_DIR \
32-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/parse.json
46+
$PDB_MMCIF_DIR \
47+
$JSON_PATH/parse.json
48+
echo "Finished parsing."
3349

3450
# run filter
35-
python -m rna3db filter $MMCIF_DIR $OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/filter.json
51+
echo "Filtering..."
52+
python -m rna3db filter \
53+
$JSON_PATH/parse.json \
54+
$JSON_PATH/filter.json
55+
echo "Finished filtering."
3656

3757
# write all sequences to a FASTA file
58+
echo "Building FASTA..."
3859
python $RNA3DB_ROOT_DIR/scripts/json_to_fasta.py \
39-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/parse.json \
40-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta
60+
$JSON_PATH/parse.json \
61+
$FASTA_PATH/$NEW_RELEASE_DATE.fasta
62+
echo "Finished building FASTA."
4163

4264
# do cmscan on all new sequences
43-
CMSCAN --cpu 64 \
44-
-o $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.o \
45-
-tbl $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.tbl \
65+
echo "Running cmscan..."
66+
$CMSCAN --cpu 64 \
67+
-o $CMSCAN_PATH/$NEW_RELEASE_DATE.o \
68+
--tbl $CMSCAN_PATH/$NEW_RELEASE_DATE.tbl \
4669
$CMDB \
47-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta
70+
$FASTA_PATH/$NEW_RELEASE_DATE.fasta
71+
echo "Finished cmscan."
4872

4973
# find all sequences that did not get a hit
74+
echo "Finding sequences without hits..."
5075
python $RNA3DB_ROOT_DIR/scripts/get_nohits.py \
51-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta \
52-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE-nohits.fasta \
53-
$OUTPUT_DIR/$NEW_RELEASE_DATE/
76+
$FASTA_PATH/$NEW_RELEASE_DATE.fasta \
77+
$FASTA_PATH/$NEW_RELEASE_DATE-nohits.fasta \
78+
$CMSCAN_PATH
79+
echo "Finished finding sequences."
5480

5581
# re-scan all sequences with --max that did not get a hit
56-
CMSCAN --max --cpu 64 \
57-
-o $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE-nohits.o \
58-
-tbl $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE-nohits.tbl \
82+
echo "Running cmscan (no hits)..."
83+
$CMSCAN --max --cpu 64 \
84+
-o $CMSCAN_PATH/$NEW_RELEASE_DATE-nohits.o \
85+
--tbl $CMSCAN_PATH/$NEW_RELEASE_DATE-nohits.tbl \
5986
$CMDB \
60-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta
87+
$FASTA_PATH/$NEW_RELEASE_DATE-nohits.fasta
88+
echo "Finished cmscan (no hits)."
6189

6290
# run cluster
91+
echo "Clustering..."
6392
python -m rna3db cluster \
64-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/filter.json \
65-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/cluster.json \
66-
--tbl_dir $OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
93+
$JSON_PATH/filter.json \
94+
$JSON_PATH/cluster.json \
95+
--tbl_dir $CMSCAN_PATH
96+
echo "Cleaning up..."
97+
rm -r $JSON_PATH/mmseqs2_*
98+
echo "Finished clustering."
6799

68100
# run split
101+
echo "Splitting..."
69102
python -m rna3db split \
70-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/cluster.json \
71-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/split.json
103+
$JSON_PATH/cluster.json \
104+
$JSON_PATH/split.json
105+
echo "Finished splitting."
72106

73107
# make mmCIFs
108+
echo "Making mmCIFs..."
74109
python scripts/json_to_mmcif.py \
75-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/split.json \
76-
/Users/marcell/Documents/rna3db/data/pdb_mmcif/ \
77-
$MMCIF_DIR \
78-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs
110+
$JSON_PATH/split.json \
111+
$PDB_MMCIF_DIR \
112+
$MMCIF_PATH
113+
echo "Finished writing mmCIFs."
79114

80115
# compress files ready for release
81-
tar -czvf \
82-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans.tar.gz \
83-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
84-
tar -czvf \
85-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans.tar.gz \
86-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
87-
tar -cfvJ \
88-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs.tar.xz \
89-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs
116+
echo "Compressing..."
117+
tar -czf \
118+
$UPLOAD_PATH/rna3db-cmscans.tar.gz \
119+
$CMSCAN_PATH
120+
tar -czf \
121+
$UPLOAD_PATH/rna3db-jsons.tar.gz \
122+
$JSON_PATH
123+
tar -cJf \
124+
$UPLOAD_PATH/rna3db-mmcifs.tar.xz \
125+
$MMCIF_PATH
126+
127+
echo "Finished."
128+
echo $NEW_RELEASE_DATE "release ready for upload."
129+
+99-53
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,137 @@
11
#!/bin/bash
22
#SBATCH -c 64
33
#SBATCH -t 0
4-
#SBATCH -p <insert partition here>
4+
#SBATCH -p eddy
55
#SBATCH --mem=64000
6-
#SBATCH -o logs/rna3db_full_release_%j.out
7-
#SBATCH -e logs/rna3db_full_release_%j.err
8-
#SBATCH --mail-user=<insert email address here>
6+
#SBATCH -o logs/rna3db_incremental_release_%j.out
7+
#SBATCH -e logs/rna3db_incremental_release_%j.err
8+
#SBATCH --mail-user=marcellszikszai@fas.harvard.edu
99
#SBATCH --mail-type=ALL
1010

11-
# the output dir, along with the date of the last release
12-
OUTPUT_DIR=""
13-
PREVIOUS_RELEASE_DATE="2024-04-26"
11+
# where you want the release to be output to
12+
OUTPUT_DIR="/n/eddy_lab/users/mszikszai/rna3db/data/test_auto_release/output"
13+
# set where the latest release is located
14+
OLD_RELEASE=$OUTPUT_DIR/2024-05-14
1415

1516
# you set these once and forget
16-
RNA3DB_ROOT_DIR=""
17-
MMCIF_DIR=""
18-
CMSCAN=""
19-
CMDB=""
17+
RNA3DB_ROOT_DIR="/n/eddy_lab/users/mszikszai/rna3db/"
18+
PDB_MMCIF_DIR="/n/eddy_lab/users/mszikszai/rna3db/data/test_auto_release/new"
19+
CMSCAN="/n/eddy_lab/users/mszikszai/infernal/binaries/cmscan"
20+
CMDB="/n/eddy_lab/users/mszikszai/rna3db-scans/rfams/14.10/Rfam.cm"
2021

22+
# set useful constants
2123
NEW_RELEASE_DATE=$(date +"%Y-%m-%d")
22-
mkdir -p $OUTPUT_DIR/$NEW_RELEASE_DATE
24+
JSON_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons
25+
MMCIF_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs
26+
CMSCAN_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
27+
FASTA_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-fastas
28+
UPLOAD_PATH=$OUTPUT_DIR/$NEW_RELEASE_DATE/uploads
29+
30+
# make directories
31+
mkdir -p $JSON_PATH
32+
mkdir -p $MMCIF_PATH
33+
mkdir -p $CMSCAN_PATH
34+
mkdir -p $FASTA_PATH
35+
mkdir -p $UPLOAD_PATH
2336

2437
# prepare the env
2538
mamba activate rna3db
2639

2740
# download latest mmCIF files
28-
bash $RNA3DB_ROOT_DIR/scripts/download_pdb_mmcif.sh $MMCIF_DIR
41+
echo "Downloading..."
42+
#bash $RNA3DB_ROOT_DIR/scripts/download_pdb_mmcif.sh $PDB_MMCIF_DIR
43+
echo "Finished downloading."
2944

3045
# run parse
46+
echo "Parsing..."
3147
python -m rna3db parse \
32-
$MMCIF_DIR \
33-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/parse.json
48+
$PDB_MMCIF_DIR \
49+
$JSON_PATH/parse.json
50+
echo "Finished parsing."
3451

3552
# run filter
36-
python -m rna3db filter $MMCIF_DIR $OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/filter.json
53+
echo "Filtering..."
54+
python -m rna3db filter \
55+
$JSON_PATH/parse.json \
56+
$JSON_PATH/filter.json
57+
echo "Finished filtering."
3758

3859
# write only the new sequences to a FASTA file
60+
echo "Building FASTA..."
3961
python $RNA3DB_ROOT_DIR/scripts/build_incremental_release_fasta.py \
40-
$OUTPUT_DIR/$OLD_RELEASE_DATE/rna3db-jsons/parse.json \
41-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/parse.json \
42-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta
43-
44-
# do cmscan on all new sequences
45-
CMSCAN --cpu 64 \
46-
-o $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.o \
47-
-tbl $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.tbl \
62+
$OLD_RELEASE/rna3db-jsons/parse.json \
63+
$JSON_PATH/parse.json \
64+
$FASTA_PATH/$NEW_RELEASE_DATE.fasta
65+
echo "Finished building FASTA."
66+
67+
echo "Copying old cmscans..."
68+
cp $OLD_RELEASE/rna3db-cmscans/*.tbl $CMSCAN_PATH/
69+
cp $OLD_RELEASE/rna3db-cmscans/*.o $CMSCAN_PATH/
70+
echo "Copied old cmscans."
71+
72+
# do cmscan on new sequences
73+
echo "Running cmscan..."
74+
$CMSCAN --cpu 64 \
75+
-o $CMSCAN_PATH/$NEW_RELEASE_DATE.o \
76+
--tbl $CMSCAN_PATH/$NEW_RELEASE_DATE.tbl \
4877
$CMDB \
49-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta
78+
$FASTA_PATH/$NEW_RELEASE_DATE.fasta
79+
echo "Finished cmscan."
5080

51-
# find new sequences that did not get a hit
81+
# find sequences that did not get a hit
82+
echo "Finding sequences without hits..."
5283
python $RNA3DB_ROOT_DIR/scripts/get_nohits.py \
53-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta \
54-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE-nohits.fasta \
55-
$OUTPUT_DIR/$NEW_RELEASE_DATE/
56-
57-
# re-scan new sequences with --max that did not get a hit
58-
CMSCAN --max --cpu 64 \
59-
-o $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE-nohits.o \
60-
-tbl $OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE-nohits.tbl \
84+
$FASTA_PATH/$NEW_RELEASE_DATE.fasta \
85+
$FASTA_PATH/$NEW_RELEASE_DATE-nohits.fasta \
86+
$CMSCAN_PATH
87+
echo "Finished finding sequences."
88+
89+
# re-scan sequences with --max that did not get a hit
90+
echo "Running cmscan (no hits)..."
91+
$CMSCAN --max --cpu 64 \
92+
-o $CMSCAN_PATH/$NEW_RELEASE_DATE-nohits.o \
93+
--tbl $CMSCAN_PATH/$NEW_RELEASE_DATE-nohits.tbl \
6194
$CMDB \
62-
$OUTPUT_DIR/$NEW_RELEASE_DATE/$NEW_RELEASE_DATE.fasta
95+
$FASTA_PATH/$NEW_RELEASE_DATE-nohits.fasta
96+
echo "Finished cmscan (no hits)."
6397

6498
# run cluster
99+
echo "Clustering..."
65100
python -m rna3db cluster \
66-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/filter.json \
67-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/cluster.json \
68-
--tbl_dir $OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
101+
$JSON_PATH/filter.json \
102+
$JSON_PATH/cluster.json \
103+
--tbl_dir $CMSCAN_PATH
104+
echo "Cleaning up..."
105+
rm -r $JSON_PATH/mmseqs2_*
106+
echo "Finished clustering."
69107

70108
# run split
109+
echo "Splitting..."
71110
python -m rna3db split \
72-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/cluster.json \
73-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/split.json
111+
$JSON_PATH/cluster.json \
112+
$JSON_PATH/split.json
113+
echo "Finished splitting."
74114

75115
# make mmCIFs
116+
echo "Making mmCIFs..."
76117
python scripts/json_to_mmcif.py \
77-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-jsons/split.json \
78-
/Users/marcell/Documents/rna3db/data/pdb_mmcif/ \
79-
$MMCIF_DIR \
80-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs
118+
$JSON_PATH/split.json \
119+
$PDB_MMCIF_DIR \
120+
$MMCIF_PATH
121+
echo "Finished writing mmCIFs."
81122

82123
# compress files ready for release
83-
tar -czvf \
84-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans.tar.gz \
85-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
86-
tar -czvf \
87-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans.tar.gz \
88-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-cmscans
89-
tar -cfvJ \
90-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs.tar.xz \
91-
$OUTPUT_DIR/$NEW_RELEASE_DATE/rna3db-mmcifs
124+
echo "Compressing..."
125+
tar -czf \
126+
$UPLOAD_PATH/rna3db-cmscans.tar.gz \
127+
$CMSCAN_PATH
128+
tar -czf \
129+
$UPLOAD_PATH/rna3db-jsons.tar.gz \
130+
$JSON_PATH
131+
tar -cJf \
132+
$UPLOAD_PATH/rna3db-mmcifs.tar.xz \
133+
$MMCIF_PATH
134+
135+
echo "Finished."
136+
echo $NEW_RELEASE_DATE "release ready for upload."
137+

0 commit comments

Comments
 (0)