Skip to content

Commit

Permalink
Generate all k-mers first + refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
slowikj committed Jan 18, 2020
1 parent a630531 commit cfbe0f2
Show file tree
Hide file tree
Showing 11 changed files with 30 additions and 20 deletions.
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified .idea/.gitignore
100644 → 100755
Empty file.
Empty file modified .idea/.name
100644 → 100755
Empty file.
Empty file modified .idea/enhancer-classifier.iml
100644 → 100755
Empty file.
Empty file modified .idea/inspectionProfiles/profiles_settings.xml
100644 → 100755
Empty file.
Empty file modified .idea/misc.xml
100644 → 100755
Empty file.
Empty file modified .idea/modules.xml
100644 → 100755
Empty file.
Empty file modified .idea/vcs.xml
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
9 changes: 3 additions & 6 deletions kmer_counter.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
from Bio.Seq import Seq


def reverse_complement(current_kmer):
return Seq(current_kmer).reverse_complement()
from Bio.Seq import reverse_complement


class KmerCounter:
Expand All @@ -25,11 +21,12 @@ def get_kmer_frequencies(self, k):

def get_kmer_counts(self, k):
kmers = dict()
self.__update_dict_with_all_kmers(k, kmers)

for i in range(len(self.seq) - k + 1):
current_kmer = self.seq[i:(i + k)]
self.__insert_kmer(current_kmer, kmers)

self.__update_dict_with_all_kmers(k, kmers)
return kmers

def __insert_kmer(self, current_kmer, kmers):
Expand Down
41 changes: 27 additions & 14 deletions tests/kmer_counter_tests.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import unittest

from kmer_counter import KmerCounter

from Bio.Seq import reverse_complement

def is_kmer_list_sorted(res):
return all(res[i][0] < res[i + 1][0] for i in range(len(res) - 1))
from kmer_counter import KmerCounter


class MyTestCase(unittest.TestCase):
Expand All @@ -19,27 +17,42 @@ def test_seq_of_one_kmer_gives_count_1_only_for_one_entry(self):

def test_seq_with_2_different_kmers(self):
k = 4
kmer = "ACTAA"
res = KmerCounter(kmer).get_kmer_counts(k)
seq = "ACTAA"
res = KmerCounter(seq).get_kmer_counts(k)
self.assertEqual(len(res), 136)
self.assertEqual(res["ACTA"], 1)
self.assertEqual(res["CTAA"], 1)
kmer1, kmer2 = "ACTA", "CTAA"
self.assertEqual(get_value_for_kmer(res, kmer1), 1)
self.assertEqual(get_value_for_kmer(res, kmer2), 1)

def test_kmer_frequencies_with_2_different_kmers(self):
k = 4
kmer = "ACTAA"
res = KmerCounter(kmer).get_kmer_frequencies(k)
seq = "ACTAA"
kmer1, kmer2 = "ACTA", "CTAA"
res = KmerCounter(seq).get_kmer_frequencies(k)
self.assertEqual(len(res), 136)
self.assertEqual(res["ACTA"], 1 / len(kmer))
self.assertEqual(res["CTAA"], 1 / len(kmer))
self.assertEqual(get_value_for_kmer(res, kmer1), 1 / len(seq))
self.assertEqual(get_value_for_kmer(res, kmer2), 1 / len(seq))

def test_kmer_frequencies_sorted_list(self):
k = 4
kmer = "ACTAA"
res = KmerCounter(kmer).get_kmer_frequencies_sorted_list(k)
seq = "ACTAA"
res = KmerCounter(seq).get_kmer_frequencies_sorted_list(k)
self.assertEqual(len(res), 136)
self.assertTrue(is_kmer_list_sorted(res)) # check if sorted


def is_kmer_list_sorted(res):
return all(res[i][0] < res[i + 1][0] for i in range(len(res) - 1))


def get_value_for_kmer(kmers, kmer):
if kmer in kmers:
return kmers[kmer]
else:
return kmers[reverse_complement(kmer)]


if __name__ == '__main__':
unittest.main()


0 comments on commit cfbe0f2

Please sign in to comment.