-
Notifications
You must be signed in to change notification settings - Fork 45
/
shuffleDataAmongSubjects_2d.py
99 lines (79 loc) · 3.4 KB
/
shuffleDataAmongSubjects_2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
import h5py
import os
'''
Shuffle data (patches) among the subjects
input:
save_dir: the h5 files you save
num: the num of components in the h5 file
output:
save as the same name h5 files
'''
def shuffleDataAmongSubjects(save_dir, savepath):
# allfilenames = os.listdir(save_dir)
# allfilenames = filter(lambda x: '.h5' in x and 'train' in x, allfilenames)
nn = 200000
# dataMR = np.zeros([nn, 1, 5, 64, 64], dtype=np.float16)
dataLPET = np.zeros([nn,1, 5, 64, 64], dtype=np.float16)
dataCT = np.zeros([nn,1, 5, 64, 64], dtype=np.float16)
dataHPET = np.zeros([nn, 1, 1, 64, 64], dtype=np.float16)
allfilenames = os.listdir(save_dir)
# print allfilenames
allfilenames = filter(lambda x: '.h5' in x and 'train' in x, allfilenames)
# print allfilenames
cnt = 0
numInOneSub = 5
batchID = 0
startInd = 0
savefilename = 'train5x64x64_'
for i_file, filename in enumerate(allfilenames):
with h5py.File(os.path.join(save_dir, filename), 'r+') as h5f:
print '*******path is ', os.path.join(save_dir, filename)
dLPET = h5f['dataLPET'][:]
dCT = h5f['dataCT'][:]
dHPET = h5f['dataHPET'][:]
unitNum = dLPET.shape[0]
print 'unitNum: ', unitNum, 'dLPET shape: ', dLPET.shape
dataLPET[startInd: (startInd + unitNum), ...] = dLPET
dataCT[startInd: startInd + unitNum, ...] = dCT
dataHPET[startInd: startInd + unitNum, ...] = dHPET
startInd = startInd + unitNum
cnt = cnt + 1
if cnt == numInOneSub:
batchID = batchID + 1
dataLPET = dataLPET[0:startInd, ...]
dataCT = dataCT[0:startInd, ...]
dataHPET = dataHPET[0:startInd, ...]
with h5py.File(os.path.join(savepath, savefilename + '{}.h5'.format(batchID)), 'w') as hf:
hf.create_dataset('dataLPET', data=dataLPET)
hf.create_dataset('dataCT', data=dataCT)
hf.create_dataset('dataHPET', data=dataHPET)
############ initialization ###############
cnt = 0
startInd = 0
print
'nn:', nn
dataLPET = np.zeros([nn, 1, 5, 64, 64], dtype=np.float16)
dataCT = np.zeros([nn, 1, 5, 64, 64], dtype=np.float16)
dataHPET = np.zeros([nn, 1, 1, 64, 64], dtype=np.float16)
# mean_train, std_train = 0., 0
batchID = batchID + 1
if startInd != 0:
dataLPET = dataLPET[0:startInd, ...]
dataCT = dataCT[0:startInd, ...]
dataHPET = dataHPET[0:startInd, ...]
with h5py.File(os.path.join(savepath, savefilename + '{}.h5'.format(batchID)), 'w') as hf:
hf.create_dataset('dataLPET', data=dataLPET)
hf.create_dataset('dataCT', data=dataCT)
hf.create_dataset('dataHPET', data=dataHPET)
return
def main():
path = '/home/niedong/Data4LowDosePET/h5DataAug_noNorm/train2D_H5/'
savepath = '/home/niedong/Data4LowDosePET/h5DataAug_noNorm/trainBatch2D_H5/'
basePath = '/home/niedong/Data4LowDosePET/h5DataAug_noNorm/'
path = basePath + 'train2D_H5/'
savepath = basePath + 'trainBatch2D_H5/'
shuffleDataAmongSubjects(path, savepath)
if __name__ == "__main__":
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
main()