-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathComputing GC Content
33 lines (30 loc) · 1018 Bytes
/
Computing GC Content
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
file = open('rosalind_gc.txt', "r")
name = None
seqs = dict()
s = ''
def readFASTA(file): # this function create a dictionary with the fasta file
name = None
seqs = dict()
for line in file:
# let's discard the newline at the end (if any)
line = line.rstrip()
# distinguish header from sequence
if line[0] == '>': # or line.startswith('>')
# it is the header
name = line[1:] # discarding the initial >
seqs[name] = ''
else:
# it is sequence
seqs[name] = seqs[name] + line
return seqs
def GCcontent(dict): # here it counts the occurence of a GC and returns the GC content %
cache = [0,0]
for i in dict:
nucleotide = "GC"
GC = round(((dict[i].count("C") + dict[i].count("G"))/len(dict[i])*100),6)
if GC > cache[1]:
cache = [i, GC]
print(cache[0]) # printitng the reuslt as rosalind want
print(cache[1])
return
print(GCcontent(readFASTA(file)))