Skip to content

Commit aef8733

Browse files
authored
Merge pull request #87 from MaozGelbart/motifpssm_fixes
FIX: MotifPssmPattern load from file now loads correctly
2 parents f200677 + 70eea9d commit aef8733

File tree

4 files changed

+85
-3
lines changed

4 files changed

+85
-3
lines changed

dnachisel/SequencePattern/MotifPssmPattern.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def list_from_file(
150150
sequence(s) with the absolute highest possible score".
151151
"""
152152
if isinstance(motifs_file, str):
153-
with open("./jaspar.txt", "r") as f:
153+
with open(motifs_file, "r") as f:
154154
motifs_list = motifs.parse(f, file_format)
155155
else:
156156
motifs_list = motifs.parse(motifs_file, file_format)

tests/data/multiple_motifs.meme.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
MEME version 4
2+
3+
ALPHABET= ACGT
4+
5+
strands: + -
6+
7+
Background letter frequencies
8+
A 0.25 C 0.25 G 0.25 T 0.25
9+
10+
MOTIF MA0016.1 MA0016.1.usp
11+
letter-probability matrix: alength= 4 w= 10 nsites= 38 E= 0
12+
0.000000 0.026316 0.973684 0.000000
13+
0.026316 0.000000 0.947368 0.026316
14+
0.000000 0.000000 1.000000 0.000000
15+
0.000000 0.000000 1.000000 0.000000
16+
0.000000 0.000000 0.000000 1.000000
17+
0.000000 0.947368 0.026316 0.026316
18+
0.921053 0.000000 0.078947 0.000000
19+
0.131579 0.657895 0.078947 0.131579
20+
0.131579 0.210526 0.578947 0.078947
21+
0.157895 0.263158 0.421053 0.157895
22+
URL http://jaspar.genereg.net/matrix/MA0016.1
23+
24+
MOTIF MA0011.2 MA0011.2.br
25+
letter-probability matrix: alength= 4 w= 6 nsites= 12 E= 0
26+
0.000000 0.833333 0.000000 0.166667
27+
0.000000 0.083333 0.000000 0.916667
28+
1.000000 0.000000 0.000000 0.000000
29+
0.083333 0.083333 0.166667 0.666667
30+
0.166667 0.000000 0.083333 0.750000
31+
0.083333 0.166667 0.083333 0.666667
32+
URL http://jaspar.genereg.net/matrix/MA0011.2
33+

tests/data/single_motif.meme.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
MEME version 4
2+
3+
ALPHABET= ACGT
4+
5+
strands: + -
6+
7+
Background letter frequencies
8+
A 0.25 C 0.25 G 0.25 T 0.25
9+
10+
MOTIF MA0011.2 MA0011.2.br
11+
letter-probability matrix: alength= 4 w= 6 nsites= 12 E= 0
12+
0.000000 0.833333 0.000000 0.166667
13+
0.000000 0.083333 0.000000 0.916667
14+
1.000000 0.000000 0.000000 0.000000
15+
0.083333 0.083333 0.166667 0.666667
16+
0.166667 0.000000 0.083333 0.750000
17+
0.083333 0.166667 0.083333 0.666667
18+
URL http://jaspar.genereg.net/matrix/MA0011.2
19+

tests/test_patterns.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,39 @@
1-
from dnachisel.SequencePattern import SequencePattern
1+
import pytest
2+
from pathlib import Path
3+
4+
from dnachisel import SequencePattern, MotifPssmPattern
5+
6+
7+
@pytest.fixture
8+
def test_single_motif_filepath():
9+
return str(Path(__file__).parent / 'data' / 'single_motif.meme.txt')
10+
11+
12+
@pytest.fixture
13+
def test_multiple_motif_filepath():
14+
return str(Path(__file__).parent / 'data' / 'multiple_motifs.meme.txt')
15+
216

317
def test_patterns_from_string():
418
pattern = SequencePattern.from_string("6xT")
519
assert pattern.expression == "TTTTTT"
620
pattern = SequencePattern.from_string("BsmBI_site")
721
assert pattern.expression == "CGTCTC"
822
pattern = SequencePattern.from_string("5x2mer")
9-
assert pattern.expression == '([ATGC]{2})\\1{4}'
23+
assert pattern.expression == '([ATGC]{2})\\1{4}'
24+
25+
26+
def test_pssm_pattern_from_file(
27+
test_single_motif_filepath, test_multiple_motif_filepath
28+
):
29+
single_pattern = MotifPssmPattern.list_from_file(
30+
test_single_motif_filepath, "minimal", relative_threshold=0.9
31+
)
32+
assert len(single_pattern) == 1
33+
assert all([isinstance(p, MotifPssmPattern) for p in single_pattern])
34+
35+
multiple_patterns = MotifPssmPattern.list_from_file(
36+
test_multiple_motif_filepath, "minimal", relative_threshold=0.9
37+
)
38+
assert len(multiple_patterns) == 2
39+
assert all([isinstance(p, MotifPssmPattern) for p in multiple_patterns])

0 commit comments

Comments
 (0)