Skip to content

Commit

Permalink
Added in strutured tests for the command-line tools in metapredict. F…
Browse files Browse the repository at this point in the history
…or now only metapredict-predict-idrs has tests but plan to add additional tests in going forward
  • Loading branch information
alexholehouse committed May 14, 2023
1 parent d7483d5 commit a3f58d6
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ __pycache__/
.DS_Store
*~

metapredict/tests/test_cli/output/*

# cython-specific stuff
metapredict/backend/cython/domain_definition.c
metapredict/backend/cython/domain_definition.html
Expand Down
42 changes: 42 additions & 0 deletions metapredict/tests/test_cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import subprocess
import pytest
import os

## Init file for the CLI tests in metapredict
##
##
##

def run_command(cmd, outfile=None):
"""
This function is a catch-all function that
1. Deletes a putative output file
2. Runs
Function to run the command-line tool and return the output."""


# if an outpufile was passed, try and delete and then check
# it was actually deleted
if outfile:
try:
os.remove(outfile)
except Exception:
pass

# check the file is missing. This raises an exception if the file doesn't get removed
if os.path.isfile(outfile):
raise Exception('When preparing to run the command, the output file was not deleted')

# run the command using subproccess
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

# return the output from subprocess, which has the following dot-variables associated
# with it:
#
# .stdout (standard output to screen)
# .stderr (standard error to screen)
# .returncode (command return code; 0 = no errror)
#
return result
24 changes: 24 additions & 0 deletions metapredict/tests/test_cli/input/three_seqs.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
>Q8N6T3
MASPRTRKVLKEVRVQDENNVCFECGAFNPQWVSVTYGIWICLECSGRHRGLGVHLSFVRSVTMDKWKDIELEKMKAGGNAKFREFLESQEDYDPCWSLQEKYNSRAAALFRDKVVALAEGREWSLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW

>p53
MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP
DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAK
SVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHE
RCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNS
SCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP
PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG
GSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD

>sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1
MAKAAAIGIDLGTTYSCVGVFQHGKVEIIANDQGNRTTPSYVAFTDTERLIGDAAKNQVA
LNPQNTVFDAKRLIGRKFGDPVVQSDMKHWPFQVINDGDKPKVQVSYKGETKAFYPEEIS
SMVLTKMKEIAEAYLGYPVTNAVITVPAYFNDSQRQATKDAGVIAGLNVLRIINEPTAAA
IAYGLDRTGKGERNVLIFDLGGGTFDVSILTIDDGIFEVKATAGDTHLGGEDFDNRLVNH
FVEEFKRKHKKDISQNKRAVRRLRTACERAKRTLSSSTQASLEIDSLFEGIDFYTSITRA
RFEELCSDLFRSTLEPVEKALRDAKLDKAQIHDLVLVGGSTRIPKVQKLLQDFFNGRDLN
KSINPDEAVAYGAAVQAAILMGDKSENVQDLLLLDVAPLSLGLETAGGVMTALIKRNSTI
PTKQTQIFTTYSDNQPGVLIQVYEGERAMTKDNNLLGRFELSGIPPAPRGVPQIEVTFDI
DANGILNVTATDKSTGKANKITITNDKGRLSKEEIERMVQEAEKYKAEDEVQRERVSAKN
ALESYAFNMKSAVEDEGLKGKISEADKKKVLDKCQEVISWLDANTLAEKDEFEHKRKELE
QVCNPIISGLYQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD
175 changes: 175 additions & 0 deletions metapredict/tests/test_cli/test_metapredict-predict-idrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import subprocess
import pytest
import protfasta
import os

from . import run_command



def test_metapredict_predict_IDRs_1():
"""
Basic test for the simplest version of
"""


precomputed_idrs = {'Q8N6T3 IDR_START=123 IDR_END=406': 'WSLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW',
'p53 IDR_START=0 IDR_END=101': 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQK',
'p53 IDR_START=280 IDR_END=393': 'DRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD',
'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=607 IDR_END=641': 'SGLYQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD'}

# remove output if there already

cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)

## check that there was no error
# no error
assert result.returncode == 0

## check nothing was printed to screen
# no output
assert "" in result.stdout

## Now we check the command actually worked. Read in the putative
# outfile generated by the command
# read
D = protfasta.read_fasta('output/test_1.fasta')

## assert this matches precomputed predictions
for d in D:
assert precomputed_idrs[d] == D[d]



def test_metapredict_predict_IDRs_2():
"""
Basic test for the simplest version of
"""


# check we can compute for different thresholds
precomputed_idrs = {'Q8N6T3 IDR_START=141 IDR_END=406': 'LPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW', 'p53 IDR_START=64 IDR_END=89': 'RMPEAAPPVAPAPAAPTPAAPAPAP', 'p53 IDR_START=295 IDR_END=318': 'HHELPPGSTKRALPNNTSSSPQP', 'p53 IDR_START=358 IDR_END=393': 'PGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', 'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=615 IDR_END=641': 'GPGPGGFGAQGPKGGSGSGPTIEEVD'}

# remove output if there already

cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --threshold 0.9'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)

## check that there was no error
# no error
assert result.returncode == 0

## check nothing was printed to screen
# no output
assert "" in result.stdout

## Now we check the command actually worked. Read in the putative
# outfile generated by the command
# read
D = protfasta.read_fasta('output/test_1.fasta')

## assert this matches precomputed predictions
for d in D:
assert precomputed_idrs[d] == D[d]

def test_metapredict_predict_IDRs_legacy():
"""
Basic test for the simplest version of
"""


# check we can compute for different thresholds
precomputed_idrs = {'Q8N6T3 IDR_START=124 IDR_END=406': 'SLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW', 'p53 IDR_START=0 IDR_END=102': 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKT', 'p53 IDR_START=277 IDR_END=393': 'PGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', 'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=610 IDR_END=641': 'YQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD'}

# remove output if there already

cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --legacy'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)

## check that there was no error
# no error
assert result.returncode == 0

## check nothing was printed to screen
# no output
assert "" in result.stdout

## Now we check the command actually worked. Read in the putative
# outfile generated by the command
# read
D = protfasta.read_fasta('output/test_1.fasta')

## assert this matches precomputed predictions
for d in D:
assert precomputed_idrs[d] == D[d]


def test_metapredict_predict_IDRs_test_invalid_sequence_actions():
#
# Not super sophisticated, but checks that the different invalid sequence actions
# perported to work can at least be passed without causing an issue
#


def check_results():
precomputed_idrs = {'Q8N6T3 IDR_START=123 IDR_END=406': 'WSLESSPAQNWTPPQPRTLPSMVHRVSGQPQSVTASSDKAFEDWLNDDLGSYQGAQGNRYVGFGNTPPPQKKEDDFLNNAMSSLYSGWSSFTTGASRFASAAKEGATKFGSQASQKASELGHSLNENVLKPAQEKVKEGKIFDDVSSGVSQLASKVQGVGSKGWRDVTTFFSGKAEGPLDSPSEGHSYQNSGLDHFQNSNIDQSFWETFGSAEPTKTRKSPSSDSWTCADTSTERRSSDSWEVWGSASTNRNSNSDGGEGGEGTKKAVPPAVPTDDGWDNQNW',
'p53 IDR_START=0 IDR_END=101': 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQK',
'p53 IDR_START=280 IDR_END=393': 'DRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD',
'sp|P0DMV8|HS71A_HUMAN Heat shock 70 kDa protein 1A OS=Homo sapiens OX=9606 GN=HSPA1A PE=1 SV=1 IDR_START=607 IDR_END=641': 'SGLYQGAGGPGPGGFGAQGPKGGSGSGPTIEEVD'}
D = protfasta.read_fasta('output/test_1.fasta')

## assert this matches precomputed predictions
for d in D:
assert precomputed_idrs[d] == D[d]




cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action ignore'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)
assert result.returncode == 0
check_results()


cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action fail'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)
assert result.returncode == 0
check_results()


cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action remove'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)
assert result.returncode == 0
check_results()

cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action convert-ignore'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)
assert result.returncode == 0
check_results()

cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action convert-remove'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)
assert result.returncode == 0
check_results()


cmd = 'metapredict-predict-idrs input/three_seqs.fasta -o output/test_1.fasta --invalid-sequence-action FAKE-ACTION'
outfile = 'output/test_1.fasta'
result = run_command(cmd, outfile)

# expect this to be 1 because error is raised passing a bad action
assert result.returncode == 1


0 comments on commit a3f58d6

Please sign in to comment.