-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorderforheatmapwithdeletion.py
119 lines (81 loc) · 3.28 KB
/
orderforheatmapwithdeletion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 28 13:44:39 2019
@author: mjsf3
"""
import numpy as np
import pandas as pd
import sys
import os
import time
#This module uploads an Excel sheet using the pandas DataFrame object. This DataFrame object is exported to a numpy array
if __name__ == '__main__':
if len(sys.argv) > 3:
directory = sys.argv[1]
filepath = sys.argv[2]
output = sys.argv[3]
elif len(sys.argv) == 3:
directory = sys.argv[1]
filepath = sys.argv[2]
output = 'output_'+filepath
else:
directory = '/storage/All data- for analysis/Control/2'
filepath = 'collatefiltered.csv'
'''else:
print('python orderforheatmap.py <dir> <filepath> <output file path>')
sys.exit()
'''
df = pd.read_csv(directory+'/'+filepath)
labels = np.array(list(df.columns))
data = df.to_numpy(float)
columns = data[:,0]
print(data.shape)
data = data[:,1:]
labels = labels[1:]
#lifetimes = np.sum(data > 0.5,axis = 0)
lifetimes = []
for j in range(0,data.shape[1]):
row = data[:,j]
lessthanhalf = np.nonzero(row < 0.5)
if lessthanhalf[0].shape[0] ==0:
lengthofdata = row.shape[0]
else:
lengthofdata = lessthanhalf[0][0]
lifetimes.append(lengthofdata)
lifetimes = np.array(lifetimes)
#delete traces where a vesicle has reappeared in the view of the intensity extractor, after the initial vesicle has burst/moved
indices_to_remove = []
for i in range(0,data.shape[1]):
if lifetimes[i] == data[:,i].shape[0]:
continue
expected_lessthan_half_I = data[lifetimes[i]:,i]
#remove nan values
expected_lessthan_half_I = expected_lessthan_half_I[np.isnan(expected_lessthan_half_I) == False]
#look for presence of intensity values above 0.5 intensity in the region of the trace expected to be lower than half intensity, if we find values greater than 0.5 we delete that row
if np.sum(expected_lessthan_half_I[5:] > 0.5) !=0:
indices_to_remove.append(i)
print(labels[indices_to_remove])
#delete the rows which are suspected to have had a second vesicle appear after the first disappears
indices_to_remove = np.array(indices_to_remove)
#all these indices are offset from the first row of data which does not count the frame numbers
lifetimes = np.delete(lifetimes,indices_to_remove)
data = np.delete(data,indices_to_remove,axis = 1)
labels = np.delete(labels,indices_to_remove)
data = np.vstack((labels[np.argsort(lifetimes)[::-1]],data[:,np.argsort(lifetimes)[::-1]]))
columns = np.concatenate((['frame'],columns))
print(columns.shape)
print(data.shape)
data = np.hstack((columns.reshape((columns.shape[0],1)),data))
df2 = pd.DataFrame(data)
labels = ['Unnamed: 0','Unnamed: 0.1']
try:
df2 = df2.drop(labels, axis = 1)
except:
try:
labels = 'Unnamed: 0'
df2 = df2.drop(labels = labels,axis = 1)
except:
pass
print(df2)
df2.to_csv(directory + '/'+output)