Skip to content

Commit

Permalink
Merge pull request #99 from Doodleverse/new_makedatasets
Browse files Browse the repository at this point in the history
New makedatasets
  • Loading branch information
ebgoldstein committed Oct 21, 2022
2 parents 1ccde41 + e89c866 commit 9b5c648
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 55 deletions.
1 change: 1 addition & 0 deletions install/gym.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- cython
- tensorflow-gpu
- ipython
- jupyter
- joblib
- tqdm
- pandas
Expand Down
14 changes: 3 additions & 11 deletions make_nd_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

# utility to merge multiple coincident jpeg images into nd numpy arrays
import sys,os, time, json, shutil
# sys.path.insert(1, 'src')

from skimage.io import imread, imsave
import numpy as np
Expand Down Expand Up @@ -126,7 +125,6 @@ def do_resize_image(f, TARGET_SIZE):
for k in config.keys():
exec(k+'=config["'+k+'"]')

## NCLASSES>=2
if NCLASSES>1:
pass
else:
Expand All @@ -152,7 +150,7 @@ def do_resize_image(f, TARGET_SIZE):
os.environ['CUDA_VISIBLE_DEVICES'] = str(SET_GPU)
else:
#use the first available GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '0' #'1'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
else:
## to use the CPU (not recommended):
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
Expand Down Expand Up @@ -261,7 +259,7 @@ def do_resize_image(f, TARGET_SIZE):


## write padded labels to file
label_data_path = newdireclabels #label_data_path.replace('labels','padded_labels')
label_data_path = newdireclabels

label_files = natsorted(glob(label_data_path+os.sep+'*.png'))
if len(label_files)<1:
Expand Down Expand Up @@ -321,7 +319,6 @@ def do_resize_image(f, TARGET_SIZE):
if 'REMAP_CLASSES' in locals():
for k in REMAP_CLASSES.items():
lab[lab==int(k[0])] = int(k[1])
# NCLASSES = len(np.unique(lab.flatten()))
else:
lab[lab>NCLASSES]=NCLASSES

Expand Down Expand Up @@ -449,14 +446,13 @@ def read_seg_dataset_multiclass(example):
alpha=128, colormap=class_label_colormap,
color_class_offset=0, do_alpha=False)

plt.imshow(color_label, alpha=0.5)#, vmin=0, vmax=NCLASSES)
plt.imshow(color_label, alpha=0.5)

file = file.numpy()

plt.axis('off')
plt.title(file)
plt.savefig(output_data_path+os.sep+'noaug_sample'+os.sep+ ROOT_STRING + 'noaug_ex'+str(counter)+'.png', dpi=200, bbox_inches='tight')
#counter +=1
plt.close('all')
counter += 1

Expand Down Expand Up @@ -580,7 +576,6 @@ def read_seg_dataset_multiclass(example):

######################## generate and print files


i = 0
for copy in tqdm(range(AUG_COPIES)):
for k in range(AUG_LOOPS):
Expand Down Expand Up @@ -644,7 +639,6 @@ def read_seg_dataset_multiclass(example):
if FILTER_VALUE>1:

for kk in range(lstack.shape[-1]):
#l = median(lstack[:,:,kk], disk(FILTER_VALUE))
l = remove_small_objects(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2))
l = remove_small_holes(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2))
lstack[:,:,kk] = np.round(l).astype(np.uint8)
Expand Down Expand Up @@ -705,7 +699,6 @@ def read_seg_dataset_multiclass(example):
else:
plt.imshow(im)

# print(lab.shape)
lab = np.argmax(lab.numpy().squeeze(),-1)

color_label = label_to_colors(np.squeeze(lab), tf.cast(im[:,:,0]==0,tf.uint8),
Expand All @@ -724,7 +717,6 @@ def read_seg_dataset_multiclass(example):
plt.axis('off')

plt.savefig(output_data_path+os.sep+'aug_sample'+os.sep+ ROOT_STRING + 'aug_ex'+str(counter)+'.png', dpi=200, bbox_inches='tight')
#counter +=1
plt.close('all')
counter += 1

Expand Down
10 changes: 9 additions & 1 deletion seg_images_in_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@
# Load in the model from the weights which is the location of the weights file
model = tf.keras.models.load_model(weights)

M.append(model)
C.append(configfile)
T.append(MODEL)

except:
# Load the metrics mean_iou, dice_coef from doodleverse_utils
# Load in the custom loss function from doodleverse_utils
Expand Down Expand Up @@ -294,20 +298,24 @@

#look for TTA config
if not 'TESTTIMEAUG' in locals():
print("TESTTIMEAUG not found in config file(s). Setting to False")
TESTTIMEAUG = False
#look for do_crf in config
if not 'DO_CRF' in locals():
print("TESTTIMEAUG not found in config file(s). Setting to False")
DO_CRF = False
if not 'WRITE_MODELMETADATA' in locals():
print("WRITE_MODELMETADATA not found in config file(s). Setting to False")
WRITE_MODELMETADATA = False
if not 'OTSU_THRESHOLD' in locals():
print("OTSU_THRESHOLD not found in config file(s). Setting to False")
OTSU_THRESHOLD = False

# Import do_seg() from doodleverse_utils to perform the segmentation on the images
for f in tqdm(sample_filenames):
try:
do_seg(f, M, metadatadict, sample_direc,NCLASSES,N_DATA_BANDS,TARGET_SIZE,TESTTIMEAUG, WRITE_MODELMETADATA,DO_CRF,OTSU_THRESHOLD)
except:
print("{} failed".format(f))
print("{} failed. Check config file, and check the path provided contains valid imagery".format(f))


18 changes: 4 additions & 14 deletions utils/recreate_saved_model.py → utils/gen_fullmodel_from_h5.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


# Written by Dr Daniel Buscombe, Marda Science LLC
# for the USGS Coastal Change Hazards Program
#
Expand All @@ -26,7 +24,7 @@
# SOFTWARE.

import sys,os, time
sys.path.insert(1, '../src')
# sys.path.insert(1, '../src')
from tkinter import filedialog
from tkinter import *
from tkinter import messagebox
Expand Down Expand Up @@ -56,8 +54,8 @@
for k in config.keys():
exec(k+'=config["'+k+'"]')


from imports import *
from doodleverse_utils.imports import *
from doodleverse_utils.model_imports import *

#=======================================================
# Import the architectures for following models from doodleverse_utils
Expand Down Expand Up @@ -92,8 +90,6 @@
)

elif MODEL =='simple_resunet':
# num_filters = 8 # initial filters
# model = res_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_filters, NCLASSES, (KERNEL_SIZE, KERNEL_SIZE))

model = simple_resunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
kernel = (2, 2),
Expand All @@ -107,7 +103,6 @@
filters=FILTERS,#8,
num_layers=4,
strides=(1,1))
#346,564
elif MODEL=='simple_unet':
model = simple_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
kernel = (2, 2),
Expand All @@ -121,10 +116,8 @@
filters=FILTERS,#8,
num_layers=4,
strides=(1,1))
#242,812

elif MODEL=='satunet':
#model = sat_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_classes=NCLASSES)

model = custom_satunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
kernel = (2, 2),
Expand All @@ -139,9 +132,6 @@
num_layers=4,
strides=(1,1))



# model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = [mean_iou, dice_coef])
model.compile(optimizer = 'adam', loss = tf.keras.losses.CategoricalCrossentropy())

model.load_weights(weights)
Expand All @@ -150,7 +140,7 @@
# use gym make"fullmodel.h5" version which zoo can read "fullmodel.h5"
model.save(weights.replace('.h5','_fullmodel.h5'))

new_model = tf.keras.models.load_model(weights.replace('.h5','_fullmodel.h5'))
# new_model = tf.keras.models.load_model(weights.replace('.h5','_fullmodel.h5'))



Expand Down
46 changes: 29 additions & 17 deletions gen_saved_model.py → utils/gen_saved_model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@

# Written by Dr Daniel Buscombe, Marda Science LLC
# for the USGS Coastal Change Hazards Program
#
# MIT License
#
# Copyright (c) 2020-22, Marda Science LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import sys,os, time
# sys.path.insert(1, 'src')
from tqdm import tqdm

USE_GPU = True

SET_GPU = '0'

## to store interim model outputs and metadata, use True
Expand All @@ -22,7 +43,6 @@
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


# from prediction_imports import *
#====================================================
from doodleverse_utils.prediction_imports import *
#---------------------------------------------------
Expand Down Expand Up @@ -81,9 +101,6 @@
)

elif MODEL =='simple_resunet':
# num_filters = 8 # initial filters
# model = res_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_filters, NCLASSES, (KERNEL_SIZE, KERNEL_SIZE))

model = simple_resunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
kernel = (2, 2),
num_classes=[NCLASSES+1 if NCLASSES==1 else NCLASSES][0],
Expand All @@ -96,7 +113,6 @@
filters=FILTERS,#8,
num_layers=4,
strides=(1,1))
#346,564
elif MODEL=='simple_unet':
model = simple_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
kernel = (2, 2),
Expand All @@ -110,21 +126,17 @@
filters=FILTERS,#8,
num_layers=4,
strides=(1,1))
#242,812

elif MODEL=='satunet':
#model = sat_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_classes=NCLASSES)

model = custom_satunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
kernel = (2, 2),
num_classes=[NCLASSES+1 if NCLASSES==1 else NCLASSES][0],
activation="relu",
use_batch_norm=True,
dropout=DROPOUT,#0.1,
dropout_change_per_layer=DROPOUT_CHANGE_PER_LAYER,#0.0,
dropout_type=DROPOUT_TYPE,#"standard",
use_dropout_on_upsampling=USE_DROPOUT_ON_UPSAMPLING,#False,
filters=FILTERS,#8,
dropout=DROPOUT,
dropout_change_per_layer=DROPOUT_CHANGE_PER_LAYER,
dropout_type=DROPOUT_TYPE,
use_dropout_on_upsampling=USE_DROPOUT_ON_UPSAMPLING,
filters=FILTERS,
num_layers=4,
strides=(1,1))

Expand Down
66 changes: 55 additions & 11 deletions utils/make_class_balanced_subset.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,67 @@
# Written by Dr Daniel Buscombe, Marda Science LLC
# for the USGS Coastal Change Hazards Program
#
# MIT License
#
# Copyright (c) 2022, Marda Science LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np
from glob import glob
import os, shutil
from tqdm import tqdm
from tkinter import filedialog, messagebox
from tkinter import *

indirec = 'v4'
outdirec = 'v5_subset'

# read files
files = glob('v4/*.npz')
# Request the folder containing the imagery/npz to segment
# sample_direc: full path to the directory
root = Tk()
root.filename = filedialog.askdirectory(title = "Select directory of class-imbalanced npzs")
indirec = root.filename
print(indirec)
root.withdraw()

# make directory for outputs
os.mkdir(outdirec)
os.mkdir(outdirec+os.sep+'no_use')
outdirec = os.path.normpath(os.path.dirname(indirec)+os.sep+os.path.basename(indirec)+'_subset')
print(outdirec)

# set minimum threshold for any proportion
# if any normalized class frequency distribution is less than this number
# it is discounted (moved to 'no_use')
thres = 1e-2
# thres = 1e-2
print("Input threshold for minor class [0 - 1], typically <0.25")
print("This is the smallest acceptable proportion of the minority class. Samples were minority < threshold will not be used")
print("The smaller the threshold, the fewer the number of samples used in the subset")
# print("\n")
thres = float(input())

print("Threshold chosen: {}".format(thres))

# read files
files = glob(indirec+os.sep+'*.npz')

try:
# make directory for outputs
os.mkdir(outdirec)
os.mkdir(outdirec+os.sep+'no_use')
except:
pass

# read files one by one
for file in tqdm(files):
Expand All @@ -25,12 +70,11 @@
label = np.argmax(label,-1)
# get normalized class distributions
norm_class_dist = np.bincount(label.flatten())/np.sum(label>-1)
# if length > 1, copy the file
# if below thres
if np.any(norm_class_dist<thres):
shutil.copyfile(file,file.replace(indirec,outdirec+os.sep+'no_use'))
# print('below threshold')
elif not len(norm_class_dist)==1:
shutil.copyfile(file,file.replace(indirec,outdirec))
else:
else: # if length > 1, copy the file
shutil.copyfile(file,file.replace(indirec,outdirec+os.sep+'no_use'))
Loading

0 comments on commit 9b5c648

Please sign in to comment.