-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_TR_cn.py
executable file
·93 lines (73 loc) · 2.08 KB
/
get_TR_cn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
import re
def remove_overlapping_intervals(intervals):
result = []
current_interval = intervals[0]
for interval in intervals[1:]:
# Check for overlap
if interval[0] < current_interval[1]:
# Overlapping intervals, keep the longest one
current_interval = max(current_interval, interval, key=lambda x: x[1] - x[0])
else:
# Non-overlapping intervals, add the current interval to the result
result.append(current_interval)
current_interval = interval
# Add the last interval to the result
result.append(current_interval)
return result
paf_f = open(sys.argv[1])
min_start = -1
max_end = -1
unit_len = 1
tot_aln_block = 0
for line in paf_f:
paf_line = line.strip().split('\t')
start, end = float(paf_line[2]), float(paf_line[3])
unit_len = float(paf_line[6])
if min_start == -1:
min_start = start
if max_end == -1:
max_end = end
min_start = min(min_start, start)
max_end = max(max_end, end)
# print(min_start, max_end)
window = max_end - min_start
est_cn = round((max_end - min_start) / unit_len)
# print(est_cn)
paf_f = open(sys.argv[1])
units = []
for line in paf_f:
paf_line = line.strip().split('\t')
start, end, aln_block = float(paf_line[2]), float(paf_line[3]), float(paf_line[10])
unit_len = float(paf_line[6])
units.append((start, end, aln_block))
if len(units) == 0:
print(est_cn)
print(0)
exit()
tot_units = 0
units.sort(key=lambda x: x[0])
# print(units)
units = remove_overlapping_intervals(units)
# print(units)
for u in units:
aln_block = u[2]
if aln_block - unit_len > 0:
window -= (aln_block - unit_len)
est_cn = round(window / unit_len)
# print(est_cn)
for i in range(len(units) - 1):
# print(units[i], units[i+1])
# print(units[i+1][0] - units[i][1])
# print(units[i+1][0] - units[i][1] >= unit_len * 0.9)
if units[i+1][0] - units[i][1] >= unit_len * 0.9:
tot_units += 1
# print(tot_units)
tot_units += len(units)
# print(est_cn)
# print(tot_units)
# print(est_cn == tot_units)
if est_cn >= 5 or tot_units >= 5:
print(tot_units)
else:
print(est_cn)