From f9a4b70d0bd7cd75c3b186c744eb47166e96e5f4 Mon Sep 17 00:00:00 2001 From: dbuscombe-usgs Date: Thu, 20 Oct 2022 11:58:50 -0700 Subject: [PATCH 1/2] minor updates to utils and seg_images_folder --- make_nd_dataset.py | 2 +- ...aved_model.py => gen_fullmodel_from_h5.py} | 18 ++--- utils/make_class_balanced_subset.py | 66 +++++++++++++++---- 3 files changed, 60 insertions(+), 26 deletions(-) rename utils/{recreate_saved_model.py => gen_fullmodel_from_h5.py} (91%) diff --git a/make_nd_dataset.py b/make_nd_dataset.py index ef400a0..ce52e9b 100644 --- a/make_nd_dataset.py +++ b/make_nd_dataset.py @@ -648,7 +648,7 @@ def read_seg_dataset_multiclass(example): l = remove_small_objects(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2)) l = remove_small_holes(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2)) lstack[:,:,kk] = np.round(l).astype(np.uint8) - del l + # del l datadict={} datadict['arr_0'] = im.astype(np.uint8) diff --git a/utils/recreate_saved_model.py b/utils/gen_fullmodel_from_h5.py similarity index 91% rename from utils/recreate_saved_model.py rename to utils/gen_fullmodel_from_h5.py index 4b11777..666f8c1 100644 --- a/utils/recreate_saved_model.py +++ b/utils/gen_fullmodel_from_h5.py @@ -1,5 +1,3 @@ - - # Written by Dr Daniel Buscombe, Marda Science LLC # for the USGS Coastal Change Hazards Program # @@ -26,7 +24,7 @@ # SOFTWARE. import sys,os, time -sys.path.insert(1, '../src') +# sys.path.insert(1, '../src') from tkinter import filedialog from tkinter import * from tkinter import messagebox @@ -56,8 +54,8 @@ for k in config.keys(): exec(k+'=config["'+k+'"]') - - from imports import * + from doodleverse_utils.imports import * + from doodleverse_utils.model_imports import * #======================================================= # Import the architectures for following models from doodleverse_utils @@ -92,8 +90,6 @@ ) elif MODEL =='simple_resunet': - # num_filters = 8 # initial filters - # model = res_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_filters, NCLASSES, (KERNEL_SIZE, KERNEL_SIZE)) model = simple_resunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), kernel = (2, 2), @@ -107,7 +103,6 @@ filters=FILTERS,#8, num_layers=4, strides=(1,1)) - #346,564 elif MODEL=='simple_unet': model = simple_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), kernel = (2, 2), @@ -121,10 +116,8 @@ filters=FILTERS,#8, num_layers=4, strides=(1,1)) - #242,812 elif MODEL=='satunet': - #model = sat_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_classes=NCLASSES) model = custom_satunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), kernel = (2, 2), @@ -139,9 +132,6 @@ num_layers=4, strides=(1,1)) - - - # model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = [mean_iou, dice_coef]) model.compile(optimizer = 'adam', loss = tf.keras.losses.CategoricalCrossentropy()) model.load_weights(weights) @@ -150,7 +140,7 @@ # use gym make"fullmodel.h5" version which zoo can read "fullmodel.h5" model.save(weights.replace('.h5','_fullmodel.h5')) - new_model = tf.keras.models.load_model(weights.replace('.h5','_fullmodel.h5')) + # new_model = tf.keras.models.load_model(weights.replace('.h5','_fullmodel.h5')) diff --git a/utils/make_class_balanced_subset.py b/utils/make_class_balanced_subset.py index c7ebe78..53f0c13 100644 --- a/utils/make_class_balanced_subset.py +++ b/utils/make_class_balanced_subset.py @@ -1,22 +1,67 @@ +# Written by Dr Daniel Buscombe, Marda Science LLC +# for the USGS Coastal Change Hazards Program +# +# MIT License +# +# Copyright (c) 2022, Marda Science LLC +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import numpy as np from glob import glob import os, shutil from tqdm import tqdm +from tkinter import filedialog, messagebox +from tkinter import * -indirec = 'v4' -outdirec = 'v5_subset' - -# read files -files = glob('v4/*.npz') +# Request the folder containing the imagery/npz to segment +# sample_direc: full path to the directory +root = Tk() +root.filename = filedialog.askdirectory(title = "Select directory of class-imbalanced npzs") +indirec = root.filename +print(indirec) +root.withdraw() -# make directory for outputs -os.mkdir(outdirec) -os.mkdir(outdirec+os.sep+'no_use') +outdirec = os.path.normpath(os.path.dirname(indirec)+os.sep+os.path.basename(indirec)+'_subset') +print(outdirec) # set minimum threshold for any proportion # if any normalized class frequency distribution is less than this number # it is discounted (moved to 'no_use') -thres = 1e-2 +# thres = 1e-2 +print("Input threshold for minor class [0 - 1], typically <0.25") +print("This is the smallest acceptable proportion of the minority class. Samples were minority < threshold will not be used") +print("The smaller the threshold, the fewer the number of samples used in the subset") +# print("\n") +thres = float(input()) + +print("Threshold chosen: {}".format(thres)) + +# read files +files = glob(indirec+os.sep+'*.npz') + +try: + # make directory for outputs + os.mkdir(outdirec) + os.mkdir(outdirec+os.sep+'no_use') +except: + pass # read files one by one for file in tqdm(files): @@ -25,12 +70,11 @@ label = np.argmax(label,-1) # get normalized class distributions norm_class_dist = np.bincount(label.flatten())/np.sum(label>-1) - # if length > 1, copy the file # if below thres if np.any(norm_class_dist 1, copy the file shutil.copyfile(file,file.replace(indirec,outdirec+os.sep+'no_use')) From dde0152d3d6ab570b16d41ea376499ac0a791044 Mon Sep 17 00:00:00 2001 From: dbuscombe-usgs Date: Thu, 20 Oct 2022 13:54:08 -0700 Subject: [PATCH 2/2] minor bug fixes and code cleans +update vanilla config, conda yml --- install/gym.yml | 1 + make_nd_dataset.py | 14 ++---- seg_images_in_folder.py | 10 +++- .../gen_saved_model.py | 46 ++++++++++++------- .../vanilla_unet_ronneburger2015.json | 11 ++++- 5 files changed, 52 insertions(+), 30 deletions(-) rename gen_saved_model.py => utils/gen_saved_model.py (77%) diff --git a/install/gym.yml b/install/gym.yml index bc5b496..b8a9580 100644 --- a/install/gym.yml +++ b/install/gym.yml @@ -10,6 +10,7 @@ dependencies: - cython - tensorflow-gpu - ipython + - jupyter - joblib - tqdm - pandas diff --git a/make_nd_dataset.py b/make_nd_dataset.py index ce52e9b..43e36f9 100644 --- a/make_nd_dataset.py +++ b/make_nd_dataset.py @@ -25,7 +25,6 @@ # utility to merge multiple coincident jpeg images into nd numpy arrays import sys,os, time, json, shutil -# sys.path.insert(1, 'src') from skimage.io import imread, imsave import numpy as np @@ -126,7 +125,6 @@ def do_resize_image(f, TARGET_SIZE): for k in config.keys(): exec(k+'=config["'+k+'"]') -## NCLASSES>=2 if NCLASSES>1: pass else: @@ -152,7 +150,7 @@ def do_resize_image(f, TARGET_SIZE): os.environ['CUDA_VISIBLE_DEVICES'] = str(SET_GPU) else: #use the first available GPU - os.environ['CUDA_VISIBLE_DEVICES'] = '0' #'1' + os.environ['CUDA_VISIBLE_DEVICES'] = '0' else: ## to use the CPU (not recommended): os.environ['CUDA_VISIBLE_DEVICES'] = '-1' @@ -261,7 +259,7 @@ def do_resize_image(f, TARGET_SIZE): ## write padded labels to file -label_data_path = newdireclabels #label_data_path.replace('labels','padded_labels') +label_data_path = newdireclabels label_files = natsorted(glob(label_data_path+os.sep+'*.png')) if len(label_files)<1: @@ -321,7 +319,6 @@ def do_resize_image(f, TARGET_SIZE): if 'REMAP_CLASSES' in locals(): for k in REMAP_CLASSES.items(): lab[lab==int(k[0])] = int(k[1]) - # NCLASSES = len(np.unique(lab.flatten())) else: lab[lab>NCLASSES]=NCLASSES @@ -449,14 +446,13 @@ def read_seg_dataset_multiclass(example): alpha=128, colormap=class_label_colormap, color_class_offset=0, do_alpha=False) - plt.imshow(color_label, alpha=0.5)#, vmin=0, vmax=NCLASSES) + plt.imshow(color_label, alpha=0.5) file = file.numpy() plt.axis('off') plt.title(file) plt.savefig(output_data_path+os.sep+'noaug_sample'+os.sep+ ROOT_STRING + 'noaug_ex'+str(counter)+'.png', dpi=200, bbox_inches='tight') - #counter +=1 plt.close('all') counter += 1 @@ -580,7 +576,6 @@ def read_seg_dataset_multiclass(example): ######################## generate and print files - i = 0 for copy in tqdm(range(AUG_COPIES)): for k in range(AUG_LOOPS): @@ -644,7 +639,6 @@ def read_seg_dataset_multiclass(example): if FILTER_VALUE>1: for kk in range(lstack.shape[-1]): - #l = median(lstack[:,:,kk], disk(FILTER_VALUE)) l = remove_small_objects(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2)) l = remove_small_holes(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2)) lstack[:,:,kk] = np.round(l).astype(np.uint8) @@ -705,7 +699,6 @@ def read_seg_dataset_multiclass(example): else: plt.imshow(im) - # print(lab.shape) lab = np.argmax(lab.numpy().squeeze(),-1) color_label = label_to_colors(np.squeeze(lab), tf.cast(im[:,:,0]==0,tf.uint8), @@ -724,7 +717,6 @@ def read_seg_dataset_multiclass(example): plt.axis('off') plt.savefig(output_data_path+os.sep+'aug_sample'+os.sep+ ROOT_STRING + 'aug_ex'+str(counter)+'.png', dpi=200, bbox_inches='tight') - #counter +=1 plt.close('all') counter += 1 diff --git a/seg_images_in_folder.py b/seg_images_in_folder.py index a5499e0..a4ee232 100644 --- a/seg_images_in_folder.py +++ b/seg_images_in_folder.py @@ -253,6 +253,10 @@ # Load in the model from the weights which is the location of the weights file model = tf.keras.models.load_model(weights) + M.append(model) + C.append(configfile) + T.append(MODEL) + except: # Load the metrics mean_iou, dice_coef from doodleverse_utils # Load in the custom loss function from doodleverse_utils @@ -294,13 +298,17 @@ #look for TTA config if not 'TESTTIMEAUG' in locals(): + print("TESTTIMEAUG not found in config file(s). Setting to False") TESTTIMEAUG = False #look for do_crf in config if not 'DO_CRF' in locals(): + print("TESTTIMEAUG not found in config file(s). Setting to False") DO_CRF = False if not 'WRITE_MODELMETADATA' in locals(): + print("WRITE_MODELMETADATA not found in config file(s). Setting to False") WRITE_MODELMETADATA = False if not 'OTSU_THRESHOLD' in locals(): + print("OTSU_THRESHOLD not found in config file(s). Setting to False") OTSU_THRESHOLD = False # Import do_seg() from doodleverse_utils to perform the segmentation on the images @@ -308,6 +316,6 @@ try: do_seg(f, M, metadatadict, sample_direc,NCLASSES,N_DATA_BANDS,TARGET_SIZE,TESTTIMEAUG, WRITE_MODELMETADATA,DO_CRF,OTSU_THRESHOLD) except: - print("{} failed".format(f)) + print("{} failed. Check config file, and check the path provided contains valid imagery".format(f)) diff --git a/gen_saved_model.py b/utils/gen_saved_model.py similarity index 77% rename from gen_saved_model.py rename to utils/gen_saved_model.py index 1795437..2979683 100644 --- a/gen_saved_model.py +++ b/utils/gen_saved_model.py @@ -1,11 +1,32 @@ - +# Written by Dr Daniel Buscombe, Marda Science LLC +# for the USGS Coastal Change Hazards Program +# +# MIT License +# +# Copyright (c) 2020-22, Marda Science LLC +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. import sys,os, time -# sys.path.insert(1, 'src') from tqdm import tqdm USE_GPU = True - SET_GPU = '0' ## to store interim model outputs and metadata, use True @@ -22,7 +43,6 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -# from prediction_imports import * #==================================================== from doodleverse_utils.prediction_imports import * #--------------------------------------------------- @@ -81,9 +101,6 @@ ) elif MODEL =='simple_resunet': - # num_filters = 8 # initial filters - # model = res_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_filters, NCLASSES, (KERNEL_SIZE, KERNEL_SIZE)) - model = simple_resunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), kernel = (2, 2), num_classes=[NCLASSES+1 if NCLASSES==1 else NCLASSES][0], @@ -96,7 +113,6 @@ filters=FILTERS,#8, num_layers=4, strides=(1,1)) -#346,564 elif MODEL=='simple_unet': model = simple_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), kernel = (2, 2), @@ -110,21 +126,17 @@ filters=FILTERS,#8, num_layers=4, strides=(1,1)) -#242,812 - elif MODEL=='satunet': - #model = sat_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_classes=NCLASSES) - model = custom_satunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), kernel = (2, 2), num_classes=[NCLASSES+1 if NCLASSES==1 else NCLASSES][0], activation="relu", use_batch_norm=True, - dropout=DROPOUT,#0.1, - dropout_change_per_layer=DROPOUT_CHANGE_PER_LAYER,#0.0, - dropout_type=DROPOUT_TYPE,#"standard", - use_dropout_on_upsampling=USE_DROPOUT_ON_UPSAMPLING,#False, - filters=FILTERS,#8, + dropout=DROPOUT, + dropout_change_per_layer=DROPOUT_CHANGE_PER_LAYER, + dropout_type=DROPOUT_TYPE, + use_dropout_on_upsampling=USE_DROPOUT_ON_UPSAMPLING, + filters=FILTERS, num_layers=4, strides=(1,1)) diff --git a/vanilla_unet_config/vanilla_unet_ronneburger2015.json b/vanilla_unet_config/vanilla_unet_ronneburger2015.json index 8610664..8c98c70 100644 --- a/vanilla_unet_config/vanilla_unet_ronneburger2015.json +++ b/vanilla_unet_config/vanilla_unet_ronneburger2015.json @@ -33,5 +33,14 @@ "AUG_HFLIP": false, "AUG_VFLIP": false, "AUG_LOOPS": 10, - "AUG_COPIES": 3 + "AUG_COPIES": 3, + "SET_GPU": "0", + "WRITE_MODELMETADATA": false, + "DO_CRF": false, + "LOSS_WEIGHTS": false, + "MODE": "all", + "SET_PCI_BUS_ID": true, + "TESTTIMEAUG": true, + "WRITE_MODELMETADATA": true, + "OTSU_THRESHOLD": true }