Skip to content

Commit

Permalink
Refactor: create KMerSpace
Browse files Browse the repository at this point in the history
  • Loading branch information
slowikj committed Jan 18, 2020
1 parent cfbe0f2 commit 43aacbb
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 101 deletions.
54 changes: 11 additions & 43 deletions kmer_counter.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,19 @@
from Bio.Seq import reverse_complement

from kmer_space import KMerSpace

class KmerCounter:

def __init__(self, seq: str, alphabet=None):
if alphabet is None:
alphabet = {"A", "C", "T", "G"}
self.seq = seq.upper()
self.alphabet = alphabet
class KMerCounter:

def get_kmer_frequencies_sorted_list(self, k):
res_dict = self.get_kmer_frequencies(k)
return sorted(list(res_dict.items()), key=lambda x: x[0])
def __init__(self, kmer_space: KMerSpace):
self.kmer_space = kmer_space

def get_kmer_frequencies(self, k):
res = self.get_kmer_counts(k)
for key in res:
res[key] /= len(self.seq)
return res
def count(self, seq):
seq = seq.upper()

def get_kmer_counts(self, k):
kmers = dict()
self.__update_dict_with_all_kmers(k, kmers)
k = self.kmer_space.k
for i in range(len(seq) - k + 1):
kmer = seq[i:(i + k)]
self.kmer_space.increase(kmer)

for i in range(len(self.seq) - k + 1):
current_kmer = self.seq[i:(i + k)]
self.__insert_kmer(current_kmer, kmers)

return kmers

def __insert_kmer(self, current_kmer, kmers):
reversed_complement_kmer = reverse_complement(current_kmer)
if current_kmer in kmers:
kmers[current_kmer] += 1
elif reversed_complement_kmer in kmers:
kmers[reversed_complement_kmer] += 1
else:
kmers[current_kmer] = 1

def __update_dict_with_all_kmers(self, k, kmer_dict):
self.__generate_all_kmers_rec(k, "", kmer_dict)

def __generate_all_kmers_rec(self, k, current_kmer, kmer_dict):
if len(current_kmer) == k:
if not (current_kmer in kmer_dict or reverse_complement(current_kmer) in kmer_dict):
kmer_dict[current_kmer] = 0
return

for elem in self.alphabet:
self.__generate_all_kmers_rec(k, current_kmer + elem, kmer_dict)
return self.kmer_space.get_sorted_by_kmer()
34 changes: 34 additions & 0 deletions kmer_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import itertools

from Bio.Seq import reverse_complement


class KMerSpace:

def __init__(self, k, alphabet):
self.k = k
self.alphabet = alphabet
self.kmer_dict = self.__create_kmer_dict(k, alphabet)

def increase(self, kmer, value=1):
if kmer in self.kmer_dict:
self.kmer_dict[kmer] += value
else:
self.kmer_dict[reverse_complement(kmer)] += value

def get_sorted_by_kmer(self):
return sorted(list(self.kmer_dict.items()),
key=lambda x: x[0])

def get_value(self, kmer):
return self.kmer_dict[kmer] if kmer in self.kmer_dict \
else self.kmer_dict[reverse_complement(kmer)]

@staticmethod
def __create_kmer_dict(k, alphabet):
res_dict = dict()
kmer_keys = list(map(lambda x: "".join(x), itertools.product(alphabet, repeat=k)))
for kmer_key in kmer_keys:
if reverse_complement(kmer_key) not in res_dict:
res_dict[kmer_key] = 0
return res_dict
58 changes: 0 additions & 58 deletions tests/kmer_counter_tests.py

This file was deleted.

19 changes: 19 additions & 0 deletions tests/kmer_space_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest

from kmer_space import KMerSpace

common_alphabet = ["A", "C", "T", "G"]


class KMerSpaceTest(unittest.TestCase):

def test_initialization(self):
kmer_space = KMerSpace(k=4, alphabet=common_alphabet)
self.assertEqual(len(kmer_space.get_sorted_by_kmer()), 136)

def test_increase_1_a_key_that_is_explicitly_in_dict(self):
kmer_space = KMerSpace(k=4, alphabet=common_alphabet)
kmer = "ACTG"
kmer_space.increase(kmer)
self.assertEqual(kmer_space.get_value(kmer), 1)

2 changes: 2 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def is_kmer_list_sorted(res):
return all(res[i][0] < res[i + 1][0] for i in range(len(res) - 1))

0 comments on commit 43aacbb

Please sign in to comment.