-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_motifs_combinations.py
102 lines (85 loc) · 4.12 KB
/
get_motifs_combinations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
import os
from itertools import combinations
from collections import Counter
def get_seqs_motifs(dict_motifs, motif, indexes_match_list):
list_seqs = []
for value in dict_motifs.values():
for v in value:
s = ""
c = 0
for i in range(len(motif)):
#print(v[c])
if i in indexes_match_list:
s += v[c]
c += 1
else:
s += motif[i]
list_seqs.append(s)
return list_seqs
def get_seqs_motifs_mod(dict_motifs, list_seqs, base):
update = []
for value in dict_motifs.values():
for v in value:
v = v[0]
for list_seq in list_seqs:
update.append(list_seq.replace(base,v))
return update
motifs_list = ["CACNNNNNNNTNGC","GCNANNNNNNNGTG","GATNNNNCTC","DGAGNNNNATC","TCABNNNNNNTARG","CYTANNNNNNVTGA"]
all_motifs_dict = {}
for motif in motifs_list:
list_seqs = []
if len(np.where(np.array(list(motif)) == "N")[0]) > 0:
dict_motifs = {}
indexes_match_list = (np.where(np.array(list(motif)) == "N")[0])
t = "ACGT" * len(indexes_match_list)
comb = list(combinations(t,len(indexes_match_list)))
dict_motifs[motif] = list(set(comb))
list_seqs = get_seqs_motifs(dict_motifs, motif, indexes_match_list)
if len(np.where(np.array(list(motif)) == "B")[0]) > 0:
dict_motifs = {}
indexes_match_list = (np.where(np.array(list(motif)) == "B")[0])
t = "CGT" * len(indexes_match_list)
comb = list(combinations(t,len(indexes_match_list)))
dict_motifs[motif] = list(set(comb))
list_seqs = get_seqs_motifs_mod(dict_motifs, list_seqs, "B")
if len(np.where(np.array(list(motif)) == "D")[0]) > 0:
dict_motifs = {}
indexes_match_list = (np.where(np.array(list(motif)) == "D")[0])
t = "AGT" * len(indexes_match_list)
comb = list(combinations(t,len(indexes_match_list)))
dict_motifs[motif] = list(set(comb))
list_seqs = get_seqs_motifs_mod(dict_motifs, list_seqs, "D")
if len(np.where(np.array(list(motif)) == "V")[0]) > 0:
dict_motifs = {}
indexes_match_list = (np.where(np.array(list(motif)) == "V")[0])
t = "ACG" * len(indexes_match_list)
comb = list(combinations(t,len(indexes_match_list)))
dict_motifs[motif] = list(set(comb))
list_seqs = get_seqs_motifs_mod(dict_motifs, list_seqs, "V")
if len(np.where(np.array(list(motif)) == "Y")[0]) > 0:
dict_motifs = {}
indexes_match_list = (np.where(np.array(list(motif)) == "Y")[0])
t = "CT" * len(indexes_match_list)
comb = list(combinations(t,len(indexes_match_list)))
dict_motifs[motif] = list(set(comb))
list_seqs = get_seqs_motifs_mod(dict_motifs, list_seqs, "Y")
if len(np.where(np.array(list(motif)) == "R")[0]) > 0:
dict_motifs = {}
indexes_match_list = (np.where(np.array(list(motif)) == "R")[0])
t = "AG" * len(indexes_match_list)
comb = list(combinations(t,len(indexes_match_list)))
dict_motifs[motif] = list(set(comb))
list_seqs = get_seqs_motifs_mod(dict_motifs, list_seqs, "R")
all_motifs_dict[motif] = list_seqs
for k,value in all_motifs_dict.items():
filename = k+".txt"
print(filename)
with open(filename,"w") as f:
for seq in value:
header = ">"+seq
f = open(filename, "a")
f.write(header)
f.write("\n")
f.write(seq)
f.write("\n")