-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathutils.py
More file actions
82 lines (56 loc) · 2.1 KB
/
utils.py
File metadata and controls
82 lines (56 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from itertools import combinations
from collections import Counter
import collections
with open('matrix.tsv', 'r') as f:
matrix = [l.strip() for l in f.readlines()] #controllare di saltare i%s :: %s ('C,G,G', 'y,C,-,G\n')
#1. Return the rows of the matrix (paths=sequences of the VG).
def num_sequences(matrix):
num_sequences = len(matrix)
return num_sequences
#2. Return the columns of matrix (polymorphic sites of the VG).
def num_segregating_sites(matrix):
num_segregating_sites = len(matrix[0].split(','))
return num_segregating_sites
#3. Return allele frequencies.
def allele_freq(matrix,collections):
allelfreq = []
for line in matrix:
no_comma = []
for c in line:
if c == ',':
continue
no_comma.append(c)
occurrences = collections.Counter(no_comma)
numerotot = (sum(occurrences.values()))
for key, value in occurrences.items():
allelfreq.append(value/numerotot)
return allelfreq
#4. Returns total number of pairwise differences observed between all sequences.
def count_differences(matrix):
counter = Counter()
combinpath = []
for p in combinations(matrix, 2):
combinpath.append(p)
count = 0
for i in range(0, len(combinpath)):
for j in range(0, len(combinpath[i][0])):
if ',' in combinpath[i][1][j]:
continue
if combinpath[i][0][j] == combinpath[i][1][j]:
print(True)
else:
count +=1
print(False)
return count
#5. Return the number of pairwise differences between the sequences and the number of DNA sequences sampled.
def avg_num_pairwise_differences(matrix):
avg_num_pairwise_differences = count_differences(matrix)/num_sequences(matrix)
return avg_num_pairwise_differences
def main():
print(num_sequences(matrix))
print(num_segregating_sites(matrix))
print(count_differences(matrix))
print(allele_freq(matrix,collections))
print(avg_num_pairwise_differences(matrix))
if __name__ == "__main__":
main()