-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added in strutured tests for the command-line tools in metapredict. F…
…or now only metapredict-predict-idrs has tests but plan to add additional tests in going forward
- Loading branch information
1 parent
d7483d5
commit a3f58d6
Showing
4 changed files
with
243 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import subprocess | ||
import pytest | ||
import os | ||
|
||
## Init file for the CLI tests in metapredict | ||
## | ||
## | ||
## | ||
|
||
def run_command(cmd, outfile=None): | ||
""" | ||
This function is a catch-all function that | ||
1. Deletes a putative output file | ||
2. Runs | ||
Function to run the command-line tool and return the output.""" | ||
|
||
|
||
# if an outpufile was passed, try and delete and then check | ||
# it was actually deleted | ||
if outfile: | ||
try: | ||
os.remove(outfile) | ||
except Exception: | ||
pass | ||
|
||
# check the file is missing. This raises an exception if the file doesn't get removed | ||
if os.path.isfile(outfile): | ||
raise Exception('When preparing to run the command, the output file was not deleted') | ||
|
||
# run the command using subproccess | ||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True) | ||
|
||
# return the output from subprocess, which has the following dot-variables associated | ||
# with it: | ||
# | ||
# .stdout (standard output to screen) | ||
# .stderr (standard error to screen) | ||
# .returncode (command return code; 0 = no errror) | ||
# | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
>Q8N6T3 | ||
MASPRTRKVLKEVRVQDENNVCFECGAFNPQWVSVTYGIWICLECSGRHRGLGVHLSFVRSVTMDKWKDIELEKMKAGGNAKFREFLESQEDYDPCWSLQEKYNSRAAALFRDKVVALAEGREWSLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW | ||
|
||
>p53 | ||
MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP | ||
DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAK | ||
SVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHE | ||
RCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNS | ||
SCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP | ||
PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG | ||
GSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD | ||
|
||
>sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 | ||
MAKAAAIGIDLGTTYSCVGVFQHGKVEIIANDQGNRTTPSYVAFTDTERLIGDAAKNQVA | ||
LNPQNTVFDAKRLIGRKFGDPVVQSDMKHWPFQVINDGDKPKVQVSYKGETKAFYPEEIS | ||
SMVLTKMKEIAEAYLGYPVTNAVITVPAYFNDSQRQATKDAGVIAGLNVLRIINEPTAAA | ||
IAYGLDRTGKGERNVLIFDLGGGTFDVSILTIDDGIFEVKATAGDTHLGGEDFDNRLVNH | ||
FVEEFKRKHKKDISQNKRAVRRLRTACERAKRTLSSSTQASLEIDSLFEGIDFYTSITRA | ||
RFEELCSDLFRSTLEPVEKALRDAKLDKAQIHDLVLVGGSTRIPKVQKLLQDFFNGRDLN | ||
KSINPDEAVAYGAAVQAAILMGDKSENVQDLLLLDVAPLSLGLETAGGVMTALIKRNSTI | ||
PTKQTQIFTTYSDNQPGVLIQVYEGERAMTKDNNLLGRFELSGIPPAPRGVPQIEVTFDI | ||
DANGILNVTATDKSTGKANKITITNDKGRLSKEEIERMVQEAEKYKAEDEVQRERVSAKN | ||
ALESYAFNMKSAVEDEGLKGKISEADKKKVLDKCQEVISWLDANTLAEKDEFEHKRKELE | ||
QVCNPIISGLYQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD |
175 changes: 175 additions & 0 deletions
175
metapredict/tests/test_cli/test_metapredict-predict-idrs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
import subprocess | ||
import pytest | ||
import protfasta | ||
import os | ||
|
||
from . import run_command | ||
|
||
|
||
|
||
def test_metapredict_predict_IDRs_1(): | ||
""" | ||
Basic test for the simplest version of | ||
""" | ||
|
||
|
||
precomputed_idrs = {'Q8N6T3 IDR_START=123 IDR_END=406': 'WSLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW', | ||
'p53 IDR_START=0 IDR_END=101': 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQK', | ||
'p53 IDR_START=280 IDR_END=393': 'DRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', | ||
'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=607 IDR_END=641': 'SGLYQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD'} | ||
|
||
# remove output if there already | ||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
|
||
## check that there was no error | ||
# no error | ||
assert result.returncode == 0 | ||
|
||
## check nothing was printed to screen | ||
# no output | ||
assert "" in result.stdout | ||
|
||
## Now we check the command actually worked. Read in the putative | ||
# outfile generated by the command | ||
# read | ||
D = protfasta.read_fasta('output/test_1.fasta') | ||
|
||
## assert this matches precomputed predictions | ||
for d in D: | ||
assert precomputed_idrs[d] == D[d] | ||
|
||
|
||
|
||
def test_metapredict_predict_IDRs_2(): | ||
""" | ||
Basic test for the simplest version of | ||
""" | ||
|
||
|
||
# check we can compute for different thresholds | ||
precomputed_idrs = {'Q8N6T3 IDR_START=141 IDR_END=406': 'LPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW', 'p53 IDR_START=64 IDR_END=89': 'RMPEAAPPVAPAPAAPTPAAPAPAP', 'p53 IDR_START=295 IDR_END=318': 'HHELPPGSTKRALPNNTSSSPQP', 'p53 IDR_START=358 IDR_END=393': 'PGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', 'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=615 IDR_END=641': 'GPGPGGFGAQGPKGGSGSGPTIEEVD'} | ||
|
||
# remove output if there already | ||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --threshold 0.9' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
|
||
## check that there was no error | ||
# no error | ||
assert result.returncode == 0 | ||
|
||
## check nothing was printed to screen | ||
# no output | ||
assert "" in result.stdout | ||
|
||
## Now we check the command actually worked. Read in the putative | ||
# outfile generated by the command | ||
# read | ||
D = protfasta.read_fasta('output/test_1.fasta') | ||
|
||
## assert this matches precomputed predictions | ||
for d in D: | ||
assert precomputed_idrs[d] == D[d] | ||
|
||
def test_metapredict_predict_IDRs_legacy(): | ||
""" | ||
Basic test for the simplest version of | ||
""" | ||
|
||
|
||
# check we can compute for different thresholds | ||
precomputed_idrs = {'Q8N6T3 IDR_START=124 IDR_END=406': 'SLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW', 'p53 IDR_START=0 IDR_END=102': 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKT', 'p53 IDR_START=277 IDR_END=393': 'PGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', 'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=610 IDR_END=641': 'YQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD'} | ||
|
||
# remove output if there already | ||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --legacy' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
|
||
## check that there was no error | ||
# no error | ||
assert result.returncode == 0 | ||
|
||
## check nothing was printed to screen | ||
# no output | ||
assert "" in result.stdout | ||
|
||
## Now we check the command actually worked. Read in the putative | ||
# outfile generated by the command | ||
# read | ||
D = protfasta.read_fasta('output/test_1.fasta') | ||
|
||
## assert this matches precomputed predictions | ||
for d in D: | ||
assert precomputed_idrs[d] == D[d] | ||
|
||
|
||
def test_metapredict_predict_IDRs_test_invalid_sequence_actions(): | ||
# | ||
# Not super sophisticated, but checks that the different invalid sequence actions | ||
# perported to work can at least be passed without causing an issue | ||
# | ||
|
||
|
||
def check_results(): | ||
precomputed_idrs = {'Q8N6T3 IDR_START=123 IDR_END=406': 'WSLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW', | ||
'p53 IDR_START=0 IDR_END=101': 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQK', | ||
'p53 IDR_START=280 IDR_END=393': 'DRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', | ||
'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=607 IDR_END=641': 'SGLYQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD'} | ||
D = protfasta.read_fasta('output/test_1.fasta') | ||
|
||
## assert this matches precomputed predictions | ||
for d in D: | ||
assert precomputed_idrs[d] == D[d] | ||
|
||
|
||
|
||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action ignore' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
assert result.returncode == 0 | ||
check_results() | ||
|
||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action fail' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
assert result.returncode == 0 | ||
check_results() | ||
|
||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action remove' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
assert result.returncode == 0 | ||
check_results() | ||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action convert-ignore' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
assert result.returncode == 0 | ||
check_results() | ||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action convert-remove' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
assert result.returncode == 0 | ||
check_results() | ||
|
||
|
||
cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action FAKE-ACTION' | ||
outfile = 'output/test_1.fasta' | ||
result = run_command(cmd, outfile) | ||
|
||
# expect this to be 1 because error is raised passing a bad action | ||
assert result.returncode == 1 | ||
|
||
|