-
Notifications
You must be signed in to change notification settings - Fork 1
/
cfold_jackhmmer.sh
151 lines (130 loc) · 6.16 KB
/
cfold_jackhmmer.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/bin/bash
# Description: AlphaFold non-docker version
# Author: Sanjay Kumar Srikakulam
# Edited by: James Wellnitz
usage() {
echo ""
echo "Please make sure all required parameters are given"
echo "Usage: $0 <OPTIONS>"
echo "Required Parameters:"
echo "-y <python> path to python3 binary"
echo "-d <data_dir> Path to directory of supporting data"
echo "-o <output_dir> Path to a directory that will store the results."
echo "-m <model_names> Names of models to use (a comma separated list)"
echo "-f <fasta_path> Path to a FASTA file containing one sequence"
echo "-t <max_template_date> Maximum template release date to consider (ISO-8601 format - i.e. YYYY-MM-DD). Important if folding historical test sets"
echo "Optional Parameters:"
echo "-n <openmm_threads> OpenMM threads (default: all available cores)"
echo "-b <benchmark> Run multiple JAX model evaluations to obtain a timing that excludes the compilation time, which should be more indicative of the time required for inferencing many proteins (default: 'False')"
echo "-g <use_gpu> Enable NVIDIA runtime to run with GPUs (default: True)"
echo "-a <gpu_devices> Comma separated list of devices to pass to 'CUDA_VISIBLE_DEVICES' (default: 0)"
echo "-p <preset> Choose preset model configuration - no ensemble and small genetic database config (reduced_dbs) - no ensembling and full genetic database config (full_dbs) or full genetic database config and 8 model ensemblings (casp14)"
echo ""
exit 1
}
while getopts ":d:o:m:f:t:g:n:a:p:b:y" i; do
case "${i}" in
d)
data_dir=$OPTARG
;;
o)
output_dir=$OPTARG
;;
m)
model_names=$OPTARG
;;
f)
fasta_path=$OPTARG
;;
t)
max_template_date=$OPTARG
;;
g)
use_gpu=$OPTARG
;;
n)
openmm_threads=$OPTARG
;;
a)
gpu_devices=$OPTARG
;;
p)
preset=$OPTARG
;;
b)
benchmark=true
;;
y)
python_bin=$OPTARG
;;
esac
done
# Parse input and set defaults
if [[ "$data_dir" == "" || "$output_dir" == "" || "$model_names" == "" || "$fasta_path" == "" || "$max_template_date" == "" ]] ; then
usage
fi
if [[ "$benchmark" == "" ]] ; then
benchmark=false
fi
if [[ "$use_gpu" == "" ]] ; then
use_gpu=true
fi
if [[ "$gpu_devices" == "" ]] ; then
gpu_devices=0
fi
if [[ "$preset" == "" ]] ; then
preset="full_dbs"
fi
if [[ "$preset" != "full_dbs" && "$preset" != "casp14" && "$preset" != "reduced_dbs" ]] ; then
echo "Unknown preset! Using default ('full_dbs')"
preset="full_dbs"
fi
jobname=$(basename $fasta_path)
if [ ! -d "$output_dir/$jobname" ]; then
mkdir "$output_dir/$jobname"
fi
result_dir="$output_dir/$jobname"
# This bash script looks for the run_alphafold.py script in its current working directory, if it does not exist then exits
current_working_dir=$(pwd)
alphafold_script="$current_working_dir/run_alphafold.py"
if [ ! -f "$alphafold_script" ]; then
echo "Alphafold python script $alphafold_script does not exist."
exit 1
fi
# Export ENVIRONMENT variables and set CUDA devices for use
# CUDA GPU control
export CUDA_VISIBLE_DEVICES=-1
if [[ "$use_gpu" == true ]] ; then
export CUDA_VISIBLE_DEVICES=0
if [[ "$gpu_devices" ]] ; then
export CUDA_VISIBLE_DEVICES=$gpu_devices
fi
fi
# OpenMM threads control
if [[ "$openmm_threads" ]] ; then
export OPENMM_CPU_THREADS=$openmm_threads
fi
# TensorFlow control
export TF_FORCE_UNIFIED_MEMORY='1'
# JAX control
export XLA_PYTHON_CLIENT_MEM_FRACTION='4.0'
# Path and user config (change me if required)
bfd_database_path="$data_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"
small_bfd_database_path="$data_dir/small_bfd/bfd-first_non_consensus_sequences.fasta"
mgnify_database_path="$data_dir/mgnify/mgy_clusters.fa"
template_mmcif_dir="$data_dir/pdb_mmcif/mmcif_files"
obsolete_pdbs_path="$data_dir/pdb_mmcif/obsolete.dat"
pdb70_database_path="$data_dir/pdb70/pdb70"
uniclust30_database_path="$data_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08"
uniref90_database_path="$data_dir/uniref90/uniref90.fasta"
# Binary path (change me if required)
hhblits_binary_path="$current_working_dir/cfold-conda/bin/hhblits"
hhsearch_binary_path="$current_working_dir/cfold-conda/bin/hhsearch"
jackhmmer_binary_path="$current_working_dir/cfold-conda/bin/jackhmmer"
kalign_binary_path="$current_working_dir/cfold-conda//bin/kalign"
# Run AlphaFold with required parameters
if [[ "$preset" == "reduced_dbs" ]]; then
./cfold-conda/bin/python3.7 $alphafold_script --hhblits_binary_path=$hhblits_binary_path --hhsearch_binary_path=$hhsearch_binary_path --jackhmmer_binary_path=$jackhmmer_binary_path --kalign_binary_path=$kalign_binary_path --small_bfd_database_path=$small_bfd_database_path --mgnify_database_path=$mgnify_database_path --template_mmcif_dir=$template_mmcif_dir --obsolete_pdbs_path=$obsolete_pdbs_path --pdb70_database_path=$pdb70_database_path --uniref90_database_path=$uniref90_database_path --data_dir=$data_dir --output_dir=$result_dir --fasta_paths=$fasta_path --model_names=$model_names --max_template_date=$max_template_date --preset=$preset --benchmark=$benchmark --logtostderr
else
./cfold-conda/bin/python3.7 $alphafold_script --hhblits_binary_path=$hhblits_binary_path --hhsearch_binary_path=$hhsearch_binary_path --jackhmmer_binary_path=$jackhmmer_binary_path --kalign_binary_path=$kalign_binary_path --bfd_database_path=$bfd_database_path --mgnify_database_path=$mgnify_database_path --template_mmcif_dir=$template_mmcif_dir --obsolete_pdbs_path=$obsolete_pdbs_path --pdb70_database_path=$pdb70_database_path --uniclust30_database_path=$uniclust30_database_path --uniref90_database_path=$uniref90_database_path --data_dir=$data_dir --output_dir=$result_dir --fasta_paths=$fasta_path --model_names=$model_names --max_template_date=$max_template_date --preset=$preset --benchmark=$benchmark --logtostderr
fi