forked from xuyk/Rosalind
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path073_SGRA.py
67 lines (52 loc) · 2.2 KB
/
073_SGRA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
'''
A solution to a ROSALIND bioinformatics problem.
Problem Title: Using the Spectrum Graph to Infer Peptides
Rosalind ID: SGRA
Rosalind #: 073
URL: http://rosalind.info/problems/sgra/
'''
from scripts import ProteinWeightDict
from Textbook_05D import topological_ordering
def find_weight_match(approx_weight, error):
for item in ProteinWeightDict().items():
if abs(item[1] - approx_weight) < error:
return item[0]
return None
def infer_longest_peptide(masses):
'''Returns the longest protein string that matches the spectrum graph of the given masses.'''
# Build the graph from the given masses.
graph = dict()
protein_weight_dict = ProteinWeightDict()
for i in xrange(len(masses)):
for j in xrange(i+1, len(masses)):
# Break the inner loop if we've exceeded the maximum weight.
if masses[j] - masses[i] > max(protein_weight_dict.values()) + 1:
break
# Check if the weight associated with masses i and j approximately matches a known protein.
temp_protein = find_weight_match(masses[j] - masses[i], 0.001)
if temp_protein is not None:
graph[masses[i], masses[j]] = temp_protein
# Get the topological ordering of the graph.
top_order = topological_ordering(graph.keys())
# Build the longest path to each node.
S = {node: '' for node in top_order}
for node in top_order:
for predecessor in map(lambda n: n[0], filter(lambda e: e[1] == node, graph.keys())):
if len(S[predecessor]) + 1 > len(S[node]):
S[node] = S[predecessor] + graph[(predecessor, node)]
# Return the longest path.
return max(S.values(), key=len)
def main():
'''Main call. Reads, runs, and saves problem specific data.'''
# Read the input data.
with open('data/rosalind_sgra.txt') as input_data:
masses = [float(line.strip()) for line in input_data.readlines()]
# Get the longeset peptide.
peptide = infer_longest_peptide(masses)
# Print and save the answer.
print peptide
with open('output/073_SGRA.txt', 'w') as output_data:
output_data.write(peptide)
if __name__ == '__main__':
main()