-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConsensus and Profile
67 lines (53 loc) · 1.63 KB
/
Consensus and Profile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from statistics import mode
file = open('rosalind_cons.txt', "r")
name = None
seqs = dict()
s = ''
def readFASTA(file): # this function create a dictionary with the fasta file
name = None
seqs = dict()
for line in file:
# let's discard the newline at the end (if any)
line = line.rstrip()
# distinguish header from sequence
if line[0] == '>': # or line.startswith('>')
# it is the header
name = line[1:] # discarding the initial >
seqs[name] = ''
else:
# it is sequence
seqs[name] = seqs[name] + line
return seqs
def countingDNA(string):
nucleotide = "ACGT"
return dict((base, string.count(base)) for base in nucleotide)
def listFASTA(dict):
lst = []
for i in dict:
lst.append(dict[i])
return lst
def most_common(List):
return mode(List)
def profile(lDNA): # it takes a list of list of the same length
d = {"consensus": "",
"A": ["A:"],
"C": ["C:"],
"G": ["G:"],
"T": ["T:"]}
for i in range(len(lDNA[0])):
cache = []
for j in range(len(lDNA)):
cache += lDNA[j][i]
d["consensus"] += (most_common(cache))
d["A"].append(cache.count("A"))
d["C"].append(cache.count("C"))
d["G"].append(cache.count("G"))
d["T"].append(cache.count("T"))
cache = ""
return d # giving us a dictionary of consensus string and the profile
k = profile(listFASTA(readFASTA(file)))
for i in k: #printing the result as rosalind want
if i == "consensus":
print(k[i])
else:
print(*k[i])