Skip to content

Commit

Permalink
Update pLM-BLAST.
Browse files Browse the repository at this point in the history
  • Loading branch information
vikramalva committed Apr 19, 2024
1 parent 9700bd0 commit 97dfc04
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 40 deletions.
4 changes: 2 additions & 2 deletions conf/tools.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#Specify hardruntime in seconds

# A new version indicates that the frontend should reload the configuration
version: "1.2.6"
version: "1.2.7"

Tools {

Expand Down Expand Up @@ -1102,7 +1102,7 @@ Tools {
threads: 16
hardruntime: 3600
language: "Python"
parameter: ["ALIGNMENT", "PLMBLASTDB", "COSINE_PERCENTILE_CUTOFF", "ALIGNMENT_CUTOFF", "WIN_LEN", "MERGE_HITS", "DESC", "PLM_ALN_MODE", "SPAN", "SIGMA_FACTOR"]
parameter: ["ALIGNMENT", "PLMBLASTDB", "COSINE_PERCENTILE_CUTOFF", "ALIGNMENT_CUTOFF", "WIN_LEN", "MERGE_HITS", "DESC", "PLM_ALN_MODE", "SPAN", "SIGMA_FACTOR", "BFACTOR","PLM_GAP_EXT"]
result_views: [{
title: "results"
component: "plmblastResults"
Expand Down
43 changes: 27 additions & 16 deletions frontend/src/i18n/lang/extras/modals/toolHelp/plmblast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,25 @@ export default {
},
{
title: 'Cosine similarity percentile cut-off',
content: `A pre-screening procedure is used to improve the performance of the database search.
First, the database of partially flattened embeddings is searched using the partially flattened
query embedding and the cosine similarity metric, and then the actual pLM-BLAST comparisons are
performed only for matches above the user-specified cut-off. The cut-off is expressed as the n-th
percentile of all cosine similarity scores. The higher the pre-screening cut-off, the faster and
less sensitive the search will be, and vice versa.`,
content: `A pre-screening procedure can be used to improve the performance of the database search.
First, the database of flattened embeddings is searched using the flattened query embedding and the
cosine similarity metric, and then the actual pLM-BLAST comparisons are performed only for matches
above the user-specified cut-off. The cut-off is expressed as the n-th percentile of all cosine
similarity scores. The higher the pre-screening cut-off, the faster and less sensitive the search
will be, and vice versa.`,
},
{
title: 'Alignment score cut-off',
content: `Each alignment is assigned a score from 0 to 1. The alignment cut-off defines the
minimum score for reporting a match. The higher the cut-off, the more stringent the search.
Also note that only matches that have passed the pre-filtering step (see "Cosine Similarity
Percentile Cutoff") are considered.
`,
content: `Each query-target alignment is assigned a score from 0 to 1. The alignment cut-off defines
the minimum score for reporting a match. The higher the cut-off, the more stringent the search. Also
note that only matches that have passed the pre-filtering step (see "Cosine Similarity Percentile Cutoff")
are considered.`,
},
{
title: 'Window length',
content: `A moving average is used to detect local alignments within the full paths determined
by the traceback procedure. Increasing the window size results in longer local alignments, but
may decrease sensitivity. This parameter is not used in Global Alignment mode.`,
content: `A moving average is used to detect local alignments within the paths determined by the traceback
procedure. Increasing the window size results in longer local alignments, but may decrease sensitivity.
This parameter is not used in Global Alignment mode.`,
},
{
title: 'Merge hits',
Expand All @@ -58,15 +57,17 @@ export default {
},
{
title: 'Max target hits',
content: `This parameter controls how many matches are displayed in the results.`,
content: `This parameter controls how many matches are displayed in the results. This filter is
applied after the alignment score cutoff filter.`,
},
{
title: 'Alignment mode',
content: `Specifies whether to return local or global alignments.`,
},
{
title: 'Minimal hit span',
content: `Specifies the minimum length of matches returned.`,
content: `Specifies the minimum length of matches returned. The minimum match length cannot be
less than the window length.`,
},
{
title: 'Sigma factor',
Expand All @@ -75,6 +76,16 @@ export default {
the algorithm stricter or more permissive, respectively. This parameter is not used in Global
Alignment mode.`,
},
{
title: 'Bfactor',
content: `Using a Bfactor value greater than 1 will reduce the initial number of alignments by
ignoring alignments that are very close to each other, thus increasing the search speed. This
parameter is not used in Global Alignment mode.`,
},
{
title: 'Gap extension penalty',
content: 'The penalty for extending a gap.',
},

],
references: `<p>pLM-BLAST – distant homology detection based on direct comparison of sequence
Expand Down
2 changes: 2 additions & 0 deletions frontend/src/i18n/lang/modules/tools-en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ export default {
alignment_cutoff: 'Alignment score cut-off',
win_len: 'Window length',
merge_hits: 'Merge hits',
bfactor: "Bfactor",
plm_gap_ext: 'Gap extension penalty',
},
},
inputPlaceholder: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,17 @@ class ParamAccess @Inject() (tel: TEL) {

// pLM-BLAST
case "PLMBLASTDB" => select("plmblastdb", default = Some("ECOD30"))
case "COSINE_PERCENTILE_CUTOFF" => select("cosine_percentile_cutoff", default = Some("95"))
case "COSINE_PERCENTILE_CUTOFF" => select("cosine_percentile_cutoff", default = Some("70"))
case "ALIGNMENT_CUTOFF" => select("alignment_cutoff", default = Some("0.30"))
case "WIN_LEN" => select("win_len", default = Some("10"))
case "WIN_LEN" => select("win_len", default = Some("15"))
case "MERGE_HITS" => select("merge_hits", default = Some("1"))
case "PLM_ALN_MODE" => select("plm_aln_mode", default = Some("False"))
case "PLM_ALN_MODE" => select("plm_aln_mode", default = Some("loc"))
case "SPAN" => select("span", default = Some("25"))
case "SIGMA_FACTOR" => select("sigma_factor", default = Some("2"))
case "BFACTOR" => select("bfactor", default = Some("2"))
case "PLM_GAP_EXT" => select("plm_gap_ext", default = Some("0.5"))



// HHpred
case "TWOTEXTALIGNMENT" =>
Expand Down
2 changes: 2 additions & 0 deletions tel/paramspec/PARAMS
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,5 @@ diamond_min_aln_cov GEN diamond_min_aln_cov.prop
plm_aln_mode GEN plm_aln_mode.prop
span GEN span.prop
sigma_factor GEN sigma_factor.prop
plm_gap_ext GEN plm_gap_ext.prop
bfactor GEN bfactor.prop
3 changes: 3 additions & 0 deletions tel/paramspec/bfactor.prop
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1 1
2 2
5 5
3 changes: 2 additions & 1 deletion tel/paramspec/cosine_percentile_cutoff.prop
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
70 70
80 80
85 85
90 90
95 95
96 96
97 97
98 98
99 99
99 99
4 changes: 2 additions & 2 deletions tel/paramspec/plm_aln_mode.prop
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
True Global
False Local
glob Global
loc Local
3 changes: 3 additions & 0 deletions tel/paramspec/plm_gap_ext.prop
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
0 0
0.5 0.5
1 1
42 changes: 26 additions & 16 deletions tel/runscripts/plmblast.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,23 @@ if [[ ${SEQ_COUNT} = "0" ]] ; then
fi
fi
source ${BIOPROGS}/dependencies/anaconda3/etc/profile.d/conda.sh
conda activate plmblast
conda activate plm_blast

set -e
export HF_HOME=$PLMBLASTPATH/cache

echo "#Calculating embedding for query sequence." >> ../results/process.log
# calculate index
python3.9 $PLMBLASTPATH/scripts/makeindex.py ../results/${JOBID}.fas ../results/${JOBID}.csv
#python $PLMBLASTPATH/scripts/makeindex.py ../results/${JOBID}.fas ../results/${JOBID}.csv

# calculate query embedding
python3.9 $PLMBLASTPATH/embeddings.py \
python3.10 $PLMBLASTPATH/embeddings.py start\
../results/${JOBID}.fas \
../results/${JOBID}.pt
echo "done" >> ../results/process.log

set -e
export MKL_NUM_THREADS=1
export NUMEXPR_NUM_THREADS=1
export OMP_NUM_THREADS=1



echo "#Searching %plmblastdb.content." >> ../results/process.log

Expand All @@ -55,32 +56,41 @@ else
adjusted_span="%span.content"
fi

python3.9 $PLMBLASTPATH/scripts/run_plm_blast.py %PLMBLAST/%plmblastdb.content \
if [[ "%plm_aln_mode.content" -eq "glob" ]]
then
aln_mode="--global_aln"
else
aln_mode=""
fi

python3.10 $PLMBLASTPATH/scripts/plmblast.py %PLMBLAST/%plmblastdb.content \
../results/${JOBID} \
../results/${JOBID}.hits.csv \
-cosine_percentile_cutoff %cosine_percentile_cutoff.content \
-cpc %cosine_percentile_cutoff.content \
-alignment_cutoff %alignment_cutoff.content \
-max_targets %desc.content \
-workers %THREADS \
-sigma_factor %sigma_factor.content \
-use_chunks \
-win %win_len.content \
-span ${adjusted_span} \
--global_aln %plm_aln_mode.content
-gap_ext %plm_gap_ext.content \
-bfactor %bfactor.content \
${aln_mode}

echo "done" >> ../results/process.log

#-max_targets %desc.content \

echo "#Preparing output." >> ../results/process.log

if [[ %merge_hits.content = "1" ]] ; then
# pLM-BLAST tends to yield rather short hits therefore it is beneficial to merge those associated
# with a single database sequence; additionally, a more strict score cut-off is used
python3.9 $PLMBLASTPATH/scripts/merge.py ../results/${JOBID}.hits.csv \
../results/${JOBID}.hits_merged.csv
python3.10 $PLMBLASTPATH/scripts/merge.py ../results/${JOBID}.hits.csv \
../results/${JOBID}.hits_merged.csv -max_hits %desc.content
mv ../results/${JOBID}.hits_merged.csv ../results/${JOBID}.hits.csv
fi

python3.9 $PLMBLASTPATH/scripts/csv2nice.py ../results/${JOBID}.hits.csv > ../results/${JOBID}.hits.txt
python3.10 $PLMBLASTPATH/scripts/csv2nice.py ../results/${JOBID}.hits.csv > ../results/${JOBID}.hits.txt

plmblast_csv_to_json.py ../results/${JOBID}.hits.csv ../results/results.json

Expand All @@ -93,4 +103,4 @@ plmblastviz.pl ${JOBID} ../results/ ../results/
sed 's/[\.\-]//g' ../results/${JOBID}.fas > ../results/query.fas
fasta2json.py ../results/query.fas ../results/query.json

echo "done" >> ../results/process.log
echo "done" >> ../results/process.log

0 comments on commit 97dfc04

Please sign in to comment.