-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProcessImages.py
375 lines (321 loc) · 16 KB
/
ProcessImages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
'''
This is the primary script to extract phenotypes from a stack of multiimage tif files. See the README for instructions for how to use this script.
'''
# %% Setup
# Import these libraries (make sure they are installed)
from plantcv import plantcv as pcv
import importlib
import os
from datetime import datetime, timedelta
import cv2 as cv2
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings("ignore", module="matplotlib")
warnings.filterwarnings("ignore", module='plotnine')
# %% Import functions from src/ directory to get snaphots, create masks, and setup image classification
from src.data import import_snapshots
from src.segmentation import createmasks
from src.util import masked_stats
from src.util import strip_whitespace
from src.viz import add_scalebar, custom_colormaps
# %% Setup the io directories
indir = 'diy_data'
outdir = os.path.join('output', 'from_' + indir)
debugdir = os.path.join('debug', 'from_' + indir)
maskdir = os.path.join(outdir, 'masks')
fluordir = os.path.join(outdir, 'fluorescence')
os.makedirs(outdir, exist_ok=True)
os.makedirs(debugdir, exist_ok=True)
os.makedirs(maskdir, exist_ok=True)
os.makedirs(fluordir, exist_ok=True)
# %% pixel resolution (mm)
# This needs to be measured for the camera and working distance. With an ImagingPAM you could punch a leaf and then relate the number of pixels to the physical dimension. See scripts/estimate_area_rgb.py for a method of getting pixel dimensions in ImageJ
pixelresolution = 0.35
# %% Import tif file information based on the filenames. If extract_frames=True it will save each frame form the multiframe TIF to a separate file in data/pimframes/ with a numeric suffix
fdf = import_snapshots.import_snapshots(indir, 'psii', extract_frames=True)
# %% Define the frames from the PSII measurements and merge this information with the filename information
pimframes = pd.read_csv(os.path.join(
indir, 'pimframes_map.csv'), skipinitialspace=True)
pimframes = strip_whitespace.strip_dfwhitespace(pimframes)#this eliminate weird whitespace around any of the character fields
fdf_dark = (pd.merge(fdf.reset_index(),
pimframes,
on=['imageid'],
how='right'))
# %% remove absorptivity measurements (frames 3 and 4) which are blank images in the default protocol
df = (fdf_dark
.query('~parameter.str.contains("Abs") and ~parameter.str.contains("FRon")', engine='python')
)
# %% remove the duplicate Fm and Fo frames where frame = Fmp or Fp (frames 5 and 6)
df = df.query(
'(parameter!="FvFm") or (parameter=="FvFm" and (frame=="Fo" or frame=="Fm") )'
)
# %% Arrange dataframe of metadata so Fv/Fm comes first
param_order = pimframes.parameter.unique()
df['parameter'] = pd.Categorical(df.parameter,
categories=param_order,
ordered=True)
# %% Setup Debug parmaeters
# pcv.params.debug can be 'plot', 'print', or 'None'. 'plot' is useful if you are testing your pipeline over a few samples so you can see each step.
pcv.params.debug = 'plot' # 'print' #'plot', 'None'
plt.rcParams["figure.figsize"] = (9, 9) #Figures will show 9x9inches which fits my monitor well.
plt.rcParams["font.family"] = "Arial" # All text is Arial
# %% The main analysis function
# I like to reload my mask function to make sure it's the latest if I've been optimizing it
# importlib.reload(createmasks)
# This function takes a dataframe of metadata that was created above. We loop through each pair of images to compute photosynthetic parameters
def image_avg(fundf):
# Predefine some variables
global c, h, roi_c, roi_h
# Get the filename for minimum and maximum fluoresence
fn_min = fundf.query('frame == "Fo" or frame == "Fp"').filename.values[0]
fn_max = fundf.query('frame == "Fm" or frame == "Fmp"').filename.values[0]
# Get the parameter name that links these 2 frames
param_name = fundf['parameter'].iloc[0]
# Create a new output filename that combines existing filename with parameter
outfn = os.path.splitext(os.path.basename(fn_max))[0]
outfn_split = outfn.split('-')
basefn = "-".join(outfn_split[0:-1])
outfn_split[-1] = param_name
outfn = "-".join(outfn_split)
print(outfn)
# Make some directories based on sample id to keep output organized
sampleid = outfn_split[2]
fmaxdir = os.path.join(fluordir, sampleid)
os.makedirs(fmaxdir, exist_ok=True)
# If debug mode is 'print', create a specific debug dir for each pim file
if pcv.params.debug == 'print':
debug_outdir = os.path.join(debugdir, outfn)
if not os.path.exists(debug_outdir):
os.makedirs(debug_outdir)
pcv.params.debug_outdir = debug_outdir
# read images and create mask from max fluorescence
# read image as is. only gray values in PSII images
imgmin, _, _ = pcv.readimage(fn_min)
img, _, _ = pcv.readimage(fn_max)
fdark = np.zeros_like(img)
out_flt = fdark.astype('float32') # <- needs to be float32 for imwrite
# We always identify the leaf area using Fm and then apply the mask to subsequent frames in the induction curve for the same day and sample
if param_name == 'FvFm':
# create mask
mask = createmasks.psIImask(img)
# find objects and setup roi to designate where the plants should be
c, h = pcv.find_objects(img, mask)
roi_c, roi_h = pcv.roi.multi(img,
coord=(160, 90),
radius=30,
spacing=(160, 175),
ncols=3,
nrows=3)
#setup individual roi plant masks
newmask = np.zeros_like(mask)
# compute fv/fm and save to file
Fv, hist_fvfm = pcv.fluor_fvfm(
fdark=fdark, fmin=imgmin, fmax=img, mask=mask, bins=128)
YII = np.divide(Fv, img, out=out_flt.copy(),
where=np.logical_and(mask > 0, img > 0))
cv2.imwrite(os.path.join(fmaxdir, outfn + '_fvfm.tif'), YII)
# NPQ is 0
NPQ = np.zeros_like(YII)
# print Fm
cv2.imwrite(os.path.join(fmaxdir, outfn + '_fmax.tif'), img)
# NPQ will always be an array of 0s
else: # compute YII and NPQ if parameter is other than FvFm
#use cv2 to read image becase pcv.readimage will save as input_image.png overwriting img
newmask = cv2.imread(os.path.join(
maskdir, basefn + '-FvFm_mask.png'), -1)
# compute YII
Fvp, hist_yii = pcv.fluor_fvfm(
fdark, fmin=imgmin, fmax=img, mask=newmask, bins=128)
# make sure to initialize with out=. using where= provides random values at False pixels. you will get a strange result. newmask comes from Fm instead of Fm' so they can be different
#newmask<0, img>0 = FALSE: not part of plant but fluorescence detected.
#newmask>0, img<=0 = FALSE: part of plant in Fm but no fluorescence detected <- this is likely the culprit because pcv.apply_mask doesn't always solve issue.
YII = np.divide(Fvp, img, out=out_flt.copy(),
where=np.logical_and(newmask > 0, img > 0))
cv2.imwrite(os.path.join(fmaxdir, outfn + '_yii.tif'), YII)
# compute NPQ
Fm = cv2.imread(os.path.join(fmaxdir, basefn + '-FvFm_fmax.tif'), -1)
NPQ = np.divide(Fm, img, out=out_flt.copy(),
where=np.logical_and(newmask > 0, img > 0))
NPQ = np.subtract(NPQ, 1, out=out_flt.copy(),
where=np.logical_and(NPQ >= 1, newmask > 0))
cv2.imwrite(os.path.join(fmaxdir, outfn + '_npq.tif'), NPQ)
# end if-else Fv/Fm
# Make as many copies of incoming dataframe as there are ROIs so all results can be saved
outdf = fundf.copy()
for i in range(0, len(roi_c)-1):
outdf = outdf.append(fundf)
outdf.imageid = outdf.imageid.astype('uint8')
# Initialize lists to store variables for each ROI and iterate through each plant
frame_avg = []
yii_avg = []
yii_std = []
npq_avg = []
npq_std = []
plantarea = []
ithroi = []
inbounds = []
i = 0
rc = roi_c[i]
for i, rc in enumerate(roi_c):
# Store iteration Number
ithroi.append(int(i))
ithroi.append(int(i)) # append twice so each image has a value.
# extract ith hierarchy
rh = roi_h[i]
# Filter objects based on being in the defined ROI
# Filter objects based on being in the defined ROI
roi_obj, hierarchy_obj, submask, obj_area = pcv.roi_objects(
img,
roi_contour=rc,
roi_hierarchy=rh,
object_contour=c,
obj_hierarchy=h,
roi_type='partial')
if obj_area > 0:
# Combine multiple plant objects within an roi together
plant_contour, plant_mask = pcv.object_composition(
img=img, contours=roi_obj, hierarchy=hierarchy_obj)
#combine plant masks after roi filter
if param_name == 'FvFm':
newmask = pcv.image_add(newmask, plant_mask)
# Calc mean and std dev of fluoresence, YII, and NPQ and save to list
frame_avg.append(masked_stats.mean(imgmin, plant_mask))
frame_avg.append(masked_stats.mean(img, plant_mask))
# need double because there are two images per loop
yii_avg.append(masked_stats.mean(YII, plant_mask))
yii_avg.append(masked_stats.mean(YII, plant_mask))
yii_std.append(masked_stats.std(YII, plant_mask))
yii_std.append(masked_stats.std(YII, plant_mask))
npq_avg.append(masked_stats.mean(NPQ, plant_mask))
npq_avg.append(masked_stats.mean(NPQ, plant_mask))
npq_std.append(masked_stats.std(NPQ, plant_mask))
npq_std.append(masked_stats.std(NPQ, plant_mask))
# Check if plant is compeltely within the frame of the image
inbounds.append(pcv.within_frame(plant_mask))
inbounds.append(pcv.within_frame(plant_mask))
#Compute the plantarea in mm^2
plantarea.append(obj_area * pixelresolution**2.)
plantarea.append(obj_area * pixelresolution**2.)
else:
print('!!! No plant detected in roi ', str(i))
frame_avg.append(0)
frame_avg.append(0)
yii_avg.append(np.nan)
yii_avg.append(np.nan)
yii_std.append(np.nan)
yii_std.append(np.nan)
npq_avg.append(np.nan)
npq_avg.append(np.nan)
npq_std.append(np.nan)
npq_std.append(np.nan)
inbounds.append(np.nan)
inbounds.append(np.nan)
plantarea.append(0)
plantarea.append(0)
# end if-else
# end roi loop
# save mask of all plants to file after roi filter
if param_name == 'FvFm':
pcv.print_image(newmask, os.path.join(maskdir, outfn + '_mask.png'))
# Output a pseudocolor of NPQ and YII for each induction period for each image
imgdir = os.path.join(outdir, 'pseudocolor_images', sampleid)
os.makedirs(imgdir, exist_ok=True)
npq_img = pcv.visualize.pseudocolor(NPQ,
obj=None,
mask=newmask,
cmap='inferno',
axes=False,
min_value=0,
max_value=2.5,
background='black',
obj_padding=0)
npq_img = add_scalebar.add_scalebar(npq_img,
pixelresolution=pixelresolution,
barwidth=20,
barlocation='lower left')
# If you change the output size and resolution you will need to adjust the timelapse video script
npq_img.set_size_inches(6,6, forward=False)
npq_img.savefig(os.path.join(imgdir, outfn + '_NPQ.png'),
bbox_inches='tight',
dpi = 150)
npq_img.clf()
yii_img = pcv.visualize.pseudocolor(YII,
obj=None,
mask=newmask,
cmap=custom_colormaps.get_cmap('imagingwin'),
axes=False,
min_value=0,
max_value=1,
background='black',
obj_padding=0)
yii_img = add_scalebar.add_scalebar(yii_img,
pixelresolution=pixelresolution,
barwidth = 20,
barlocation = 'lower left')
yii_img.set_size_inches(6,6, forward=False)
yii_img.savefig(os.path.join(imgdir, outfn + '_YII.png'),
bbox_inches='tight',
dpi = 150)
yii_img.clf()
# check YII values for uniqueness between all ROI. nonunique ROI suggests the plants grew into each other and can no longer be reliably separated in image processing.
# a single value isn't always robust. I think because there ae small independent objects that fall in one roi but not the other that change the object within the roi slightly.
# also note, I originally designed this for trays of 2 pots. It will not detect if e.g. 2 out of 9 plants grow into each other
rounded_avg = [round(n, 3) for n in yii_avg]
rounded_std = [round(n, 3) for n in yii_std]
isunique = not (rounded_avg.count(rounded_avg[0]) == len(yii_avg) and
rounded_std.count(rounded_std[0]) == len(yii_std))
# save all values to outgoing dataframe
outdf['roi'] = ithroi
outdf['frame_avg'] = frame_avg
outdf['yii_avg'] = yii_avg
outdf['npq_avg'] = npq_avg
outdf['yii_std'] = yii_std
outdf['npq_std'] = npq_std
outdf['plantarea'] = plantarea
outdf['obj_in_frame'] = inbounds
outdf['unique_roi'] = isunique
return (outdf)
# end of function!
# %% Setup Debug parameters
#by default params.debug should be 'None' when you are ready to process all your images
pcv.params.debug = 'None'
# if you choose to print debug files to disk then remove the old ones first (if they exist)
if pcv.params.debug == 'print':
import shutil
shutil.rmtree(os.path.join(debugdir), ignore_errors=True)
# %% Testing dataframe
# If you need to test new function or threshold values you can subset your dataframe to analyze some images
# Comment df2= if you want to analyze all files!
# df2=df.query('(sampleid == "tray4" and (jobdate == "2019-08-10" or jobdate == "2019-08-01"))')# and (parameter == "t300_ALon" or parameter == "FvFm"))')
# del df2
# fundf = df2
# del fundf
# # # fundf
# # end testing
# %% Process the files
# check for subsetted dataframe
if 'df2' not in globals():
df2 = df
else:
print('df2 already exists!')
# Each unique combination of treatment, sampleid, jobdate, parameter should result in exactly 2 rows in the dataframe that correspond to Fo/Fm or F'/Fm'
dfgrps = df2.groupby(['treatment', 'sampleid', 'jobdate', 'parameter'])
grplist = []
for grp, grpdf in dfgrps:
# print(grp)#'%s ---' % (grp))
grplist.append(image_avg(grpdf))
df_avg = pd.concat(grplist)
# %% Add genotype information to the processing results
gtypeinfo = pd.read_csv(os.path.join(indir, 'genotype_map.csv'), skipinitialspace=True)
gtypeinfo = strip_whitespace.strip_dfwhitespace(gtypeinfo) #strip whitespace from any fields. using sep="\s*,\s" in read_csv doesn't work. first header value get messed up
df_avg2 = (pd.merge(df_avg,
gtypeinfo,
on=['treatment', 'sampleid', 'roi'],
how='inner')
)
# %% Write the tabular results to file!
(df_avg2.sort_values(['treatment', 'date', 'sampleid', 'imageid'])
.to_csv(os.path.join(outdir, 'output_psII_level0.csv'), na_rep='nan', float_format='%.4f', index=False)
)