From f9a4b70d0bd7cd75c3b186c744eb47166e96e5f4 Mon Sep 17 00:00:00 2001
From: dbuscombe-usgs <dbuacombe@gmail.com>
Date: Thu, 20 Oct 2022 11:58:50 -0700
Subject: [PATCH 1/2] minor updates to utils and seg_images_folder

---
 make_nd_dataset.py                            |  2 +-
 ...aved_model.py => gen_fullmodel_from_h5.py} | 18 ++---
 utils/make_class_balanced_subset.py           | 66 +++++++++++++++----
 3 files changed, 60 insertions(+), 26 deletions(-)
 rename utils/{recreate_saved_model.py => gen_fullmodel_from_h5.py} (91%)

diff --git a/make_nd_dataset.py b/make_nd_dataset.py
index ef400a0..ce52e9b 100644
--- a/make_nd_dataset.py
+++ b/make_nd_dataset.py
@@ -648,7 +648,7 @@ def read_seg_dataset_multiclass(example):
                     l = remove_small_objects(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2))
                     l = remove_small_holes(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2))
                     lstack[:,:,kk] = np.round(l).astype(np.uint8)
-                    del l
+                    # del l
 
             datadict={}
             datadict['arr_0'] = im.astype(np.uint8)
diff --git a/utils/recreate_saved_model.py b/utils/gen_fullmodel_from_h5.py
similarity index 91%
rename from utils/recreate_saved_model.py
rename to utils/gen_fullmodel_from_h5.py
index 4b11777..666f8c1 100644
--- a/utils/recreate_saved_model.py
+++ b/utils/gen_fullmodel_from_h5.py
@@ -1,5 +1,3 @@
-
-
 # Written by Dr Daniel Buscombe, Marda Science LLC
 # for  the USGS Coastal Change Hazards Program
 #
@@ -26,7 +24,7 @@
 # SOFTWARE.
 
 import sys,os, time
-sys.path.insert(1, '../src')
+# sys.path.insert(1, '../src')
 from tkinter import filedialog
 from tkinter import *
 from tkinter import messagebox
@@ -56,8 +54,8 @@
     for k in config.keys():
         exec(k+'=config["'+k+'"]')
 
-
-    from imports import *
+    from doodleverse_utils.imports import *
+    from doodleverse_utils.model_imports import *
 
     #=======================================================
     # Import the architectures for following models from doodleverse_utils
@@ -92,8 +90,6 @@
                         )
 
     elif MODEL =='simple_resunet':
-        # num_filters = 8 # initial filters
-        # model = res_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_filters, NCLASSES, (KERNEL_SIZE, KERNEL_SIZE))
 
         model = simple_resunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
                     kernel = (2, 2),
@@ -107,7 +103,6 @@
                     filters=FILTERS,#8,
                     num_layers=4,
                     strides=(1,1))
-    #346,564
     elif MODEL=='simple_unet':
         model = simple_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
                     kernel = (2, 2),
@@ -121,10 +116,8 @@
                     filters=FILTERS,#8,
                     num_layers=4,
                     strides=(1,1))
-    #242,812
 
     elif MODEL=='satunet':
-        #model = sat_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_classes=NCLASSES)
 
         model = custom_satunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
                     kernel = (2, 2),
@@ -139,9 +132,6 @@
                     num_layers=4,
                     strides=(1,1))
 
-
-
-    # model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = [mean_iou, dice_coef])
     model.compile(optimizer = 'adam', loss = tf.keras.losses.CategoricalCrossentropy())
 
     model.load_weights(weights)
@@ -150,7 +140,7 @@
     # use gym  make"fullmodel.h5" version which zoo can read "fullmodel.h5" 
     model.save(weights.replace('.h5','_fullmodel.h5'))
 
-    new_model = tf.keras.models.load_model(weights.replace('.h5','_fullmodel.h5'))
+    # new_model = tf.keras.models.load_model(weights.replace('.h5','_fullmodel.h5'))
 
 
 
diff --git a/utils/make_class_balanced_subset.py b/utils/make_class_balanced_subset.py
index c7ebe78..53f0c13 100644
--- a/utils/make_class_balanced_subset.py
+++ b/utils/make_class_balanced_subset.py
@@ -1,22 +1,67 @@
+# Written by Dr Daniel Buscombe, Marda Science LLC
+# for the USGS Coastal Change Hazards Program
+#
+# MIT License
+#
+# Copyright (c) 2022, Marda Science LLC
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
 import numpy as np 
 from glob import glob 
 import os, shutil
 from tqdm import tqdm
+from tkinter import filedialog, messagebox
+from tkinter import *
 
-indirec = 'v4'
-outdirec = 'v5_subset'
-
-# read files
-files = glob('v4/*.npz')
+# Request the folder containing the imagery/npz to segment 
+# sample_direc: full path to the directory
+root = Tk()
+root.filename =  filedialog.askdirectory(title = "Select directory of class-imbalanced npzs")
+indirec = root.filename
+print(indirec)
+root.withdraw()
 
-# make directory for outputs
-os.mkdir(outdirec)
-os.mkdir(outdirec+os.sep+'no_use')
+outdirec = os.path.normpath(os.path.dirname(indirec)+os.sep+os.path.basename(indirec)+'_subset')
+print(outdirec)
 
 # set minimum threshold for any proportion
 # if any normalized class frequency distribution is less than this number
 # it is discounted (moved to 'no_use')
-thres = 1e-2
+# thres = 1e-2
+print("Input threshold for minor class [0 - 1], typically <0.25")
+print("This is the smallest acceptable proportion of the minority class. Samples were minority < threshold will not be used")
+print("The smaller the threshold, the fewer the number of samples used in the subset")
+# print("\n")
+thres = float(input())
+
+print("Threshold chosen: {}".format(thres))
+
+# read files
+files = glob(indirec+os.sep+'*.npz')
+
+try:
+    # make directory for outputs
+    os.mkdir(outdirec)
+    os.mkdir(outdirec+os.sep+'no_use')
+except:
+    pass
 
 # read files one by one
 for file in tqdm(files):
@@ -25,12 +70,11 @@
         label = np.argmax(label,-1)
         # get normalized class distributions
         norm_class_dist = np.bincount(label.flatten())/np.sum(label>-1)
-        # if length > 1, copy the file
         # if below thres
         if np.any(norm_class_dist<thres):
             shutil.copyfile(file,file.replace(indirec,outdirec+os.sep+'no_use'))
             # print('below threshold')
         elif not len(norm_class_dist)==1:
             shutil.copyfile(file,file.replace(indirec,outdirec))            
-        else:
+        else: # if length > 1, copy the file
             shutil.copyfile(file,file.replace(indirec,outdirec+os.sep+'no_use'))

From dde0152d3d6ab570b16d41ea376499ac0a791044 Mon Sep 17 00:00:00 2001
From: dbuscombe-usgs <dbuacombe@gmail.com>
Date: Thu, 20 Oct 2022 13:54:08 -0700
Subject: [PATCH 2/2] minor bug fixes and code cleans +update vanilla config,
 conda yml

---
 install/gym.yml                               |  1 +
 make_nd_dataset.py                            | 14 ++----
 seg_images_in_folder.py                       | 10 +++-
 .../gen_saved_model.py                        | 46 ++++++++++++-------
 .../vanilla_unet_ronneburger2015.json         | 11 ++++-
 5 files changed, 52 insertions(+), 30 deletions(-)
 rename gen_saved_model.py => utils/gen_saved_model.py (77%)

diff --git a/install/gym.yml b/install/gym.yml
index bc5b496..b8a9580 100644
--- a/install/gym.yml
+++ b/install/gym.yml
@@ -10,6 +10,7 @@ dependencies:
  - cython
  - tensorflow-gpu
  - ipython
+ - jupyter
  - joblib
  - tqdm
  - pandas
diff --git a/make_nd_dataset.py b/make_nd_dataset.py
index ce52e9b..43e36f9 100644
--- a/make_nd_dataset.py
+++ b/make_nd_dataset.py
@@ -25,7 +25,6 @@
 
 # utility to merge multiple coincident jpeg images into nd numpy arrays
 import sys,os, time, json, shutil
-# sys.path.insert(1, 'src')
 
 from skimage.io import imread, imsave
 import numpy as np
@@ -126,7 +125,6 @@ def do_resize_image(f, TARGET_SIZE):
 for k in config.keys():
     exec(k+'=config["'+k+'"]')
 
-## NCLASSES>=2
 if NCLASSES>1:
     pass
 else:
@@ -152,7 +150,7 @@ def do_resize_image(f, TARGET_SIZE):
         os.environ['CUDA_VISIBLE_DEVICES'] = str(SET_GPU)
     else:
         #use the first available GPU
-        os.environ['CUDA_VISIBLE_DEVICES'] = '0' #'1'
+        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 else:
    ## to use the CPU (not recommended):
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
@@ -261,7 +259,7 @@ def do_resize_image(f, TARGET_SIZE):
 
 
 ## write padded labels to file
-label_data_path = newdireclabels #label_data_path.replace('labels','padded_labels')
+label_data_path = newdireclabels 
 
 label_files = natsorted(glob(label_data_path+os.sep+'*.png'))
 if len(label_files)<1:
@@ -321,7 +319,6 @@ def do_resize_image(f, TARGET_SIZE):
     if 'REMAP_CLASSES' in locals():
         for k in REMAP_CLASSES.items():
             lab[lab==int(k[0])] = int(k[1])
-        # NCLASSES = len(np.unique(lab.flatten()))
     else:
         lab[lab>NCLASSES]=NCLASSES
 
@@ -449,14 +446,13 @@ def read_seg_dataset_multiclass(example):
                                     alpha=128, colormap=class_label_colormap,
                                      color_class_offset=0, do_alpha=False)
 
-     plt.imshow(color_label,  alpha=0.5)#, vmin=0, vmax=NCLASSES)
+     plt.imshow(color_label,  alpha=0.5)
 
      file = file.numpy()
 
      plt.axis('off')
      plt.title(file)
      plt.savefig(output_data_path+os.sep+'noaug_sample'+os.sep+ ROOT_STRING + 'noaug_ex'+str(counter)+'.png', dpi=200, bbox_inches='tight')
-     #counter +=1
      plt.close('all')
      counter += 1
 
@@ -580,7 +576,6 @@ def read_seg_dataset_multiclass(example):
 
 ######################## generate and print files
 
-
 i = 0
 for copy in tqdm(range(AUG_COPIES)):
     for k in range(AUG_LOOPS):
@@ -644,7 +639,6 @@ def read_seg_dataset_multiclass(example):
             if FILTER_VALUE>1:
 
                 for kk in range(lstack.shape[-1]):
-                    #l = median(lstack[:,:,kk], disk(FILTER_VALUE))
                     l = remove_small_objects(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2))
                     l = remove_small_holes(lstack[:,:,kk].astype('uint8')>0, np.pi*(FILTER_VALUE**2))
                     lstack[:,:,kk] = np.round(l).astype(np.uint8)
@@ -705,7 +699,6 @@ def read_seg_dataset_multiclass(example):
      else:
          plt.imshow(im)
 
-     # print(lab.shape)
      lab = np.argmax(lab.numpy().squeeze(),-1)
 
      color_label = label_to_colors(np.squeeze(lab), tf.cast(im[:,:,0]==0,tf.uint8),
@@ -724,7 +717,6 @@ def read_seg_dataset_multiclass(example):
      plt.axis('off')
 
      plt.savefig(output_data_path+os.sep+'aug_sample'+os.sep+ ROOT_STRING + 'aug_ex'+str(counter)+'.png', dpi=200, bbox_inches='tight')
-     #counter +=1
      plt.close('all')
      counter += 1
 
diff --git a/seg_images_in_folder.py b/seg_images_in_folder.py
index a5499e0..a4ee232 100644
--- a/seg_images_in_folder.py
+++ b/seg_images_in_folder.py
@@ -253,6 +253,10 @@
         # Load in the model from the weights which is the location of the weights file        
         model = tf.keras.models.load_model(weights)
 
+        M.append(model)
+        C.append(configfile)
+        T.append(MODEL)
+        
     except:
         # Load the metrics mean_iou, dice_coef from doodleverse_utils
         # Load in the custom loss function from doodleverse_utils        
@@ -294,13 +298,17 @@
 
 #look for TTA config
 if not 'TESTTIMEAUG' in locals():
+    print("TESTTIMEAUG not found in config file(s). Setting to False")
     TESTTIMEAUG = False
 #look for do_crf in config
 if not 'DO_CRF' in locals():
+    print("TESTTIMEAUG not found in config file(s). Setting to False")
     DO_CRF = False
 if not 'WRITE_MODELMETADATA' in locals():
+    print("WRITE_MODELMETADATA not found in config file(s). Setting to False")
     WRITE_MODELMETADATA = False
 if not 'OTSU_THRESHOLD' in locals():
+    print("OTSU_THRESHOLD not found in config file(s). Setting to False")
     OTSU_THRESHOLD = False
 
 # Import do_seg() from doodleverse_utils to perform the segmentation on the images
@@ -308,6 +316,6 @@
     try:
         do_seg(f, M, metadatadict, sample_direc,NCLASSES,N_DATA_BANDS,TARGET_SIZE,TESTTIMEAUG, WRITE_MODELMETADATA,DO_CRF,OTSU_THRESHOLD)
     except:
-        print("{} failed".format(f))
+        print("{} failed. Check config file, and check the path provided contains valid imagery".format(f))
 
 
diff --git a/gen_saved_model.py b/utils/gen_saved_model.py
similarity index 77%
rename from gen_saved_model.py
rename to utils/gen_saved_model.py
index 1795437..2979683 100644
--- a/gen_saved_model.py
+++ b/utils/gen_saved_model.py
@@ -1,11 +1,32 @@
-
+# Written by Dr Daniel Buscombe, Marda Science LLC
+# for the USGS Coastal Change Hazards Program
+#
+# MIT License
+#
+# Copyright (c) 2020-22, Marda Science LLC
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
 
 import sys,os, time
-# sys.path.insert(1, 'src')
 from tqdm import tqdm
 
 USE_GPU = True
-
 SET_GPU = '0'
 
 ## to store interim model outputs and metadata, use True
@@ -22,7 +43,6 @@
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 
 
-# from prediction_imports import *
 #====================================================
 from doodleverse_utils.prediction_imports import *
 #---------------------------------------------------
@@ -81,9 +101,6 @@
                     )
 
 elif MODEL =='simple_resunet':
-    # num_filters = 8 # initial filters
-    # model = res_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_filters, NCLASSES, (KERNEL_SIZE, KERNEL_SIZE))
-
     model = simple_resunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
                 kernel = (2, 2),
                 num_classes=[NCLASSES+1 if NCLASSES==1 else NCLASSES][0],
@@ -96,7 +113,6 @@
                 filters=FILTERS,#8,
                 num_layers=4,
                 strides=(1,1))
-#346,564
 elif MODEL=='simple_unet':
     model = simple_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
                 kernel = (2, 2),
@@ -110,21 +126,17 @@
                 filters=FILTERS,#8,
                 num_layers=4,
                 strides=(1,1))
-#242,812
-
 elif MODEL=='satunet':
-    #model = sat_unet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS), num_classes=NCLASSES)
-
     model = custom_satunet((TARGET_SIZE[0], TARGET_SIZE[1], N_DATA_BANDS),
                 kernel = (2, 2),
                 num_classes=[NCLASSES+1 if NCLASSES==1 else NCLASSES][0],
                 activation="relu",
                 use_batch_norm=True,
-                dropout=DROPOUT,#0.1,
-                dropout_change_per_layer=DROPOUT_CHANGE_PER_LAYER,#0.0,
-                dropout_type=DROPOUT_TYPE,#"standard",
-                use_dropout_on_upsampling=USE_DROPOUT_ON_UPSAMPLING,#False,
-                filters=FILTERS,#8,
+                dropout=DROPOUT,
+                dropout_change_per_layer=DROPOUT_CHANGE_PER_LAYER,
+                dropout_type=DROPOUT_TYPE,
+                use_dropout_on_upsampling=USE_DROPOUT_ON_UPSAMPLING,
+                filters=FILTERS,
                 num_layers=4,
                 strides=(1,1))
 
diff --git a/vanilla_unet_config/vanilla_unet_ronneburger2015.json b/vanilla_unet_config/vanilla_unet_ronneburger2015.json
index 8610664..8c98c70 100644
--- a/vanilla_unet_config/vanilla_unet_ronneburger2015.json
+++ b/vanilla_unet_config/vanilla_unet_ronneburger2015.json
@@ -33,5 +33,14 @@
   "AUG_HFLIP": false,
   "AUG_VFLIP": false,
   "AUG_LOOPS": 10,
-  "AUG_COPIES": 3
+  "AUG_COPIES": 3,
+  "SET_GPU": "0",
+  "WRITE_MODELMETADATA": false,
+  "DO_CRF": false,
+  "LOSS_WEIGHTS": false,
+  "MODE": "all",
+  "SET_PCI_BUS_ID": true,
+  "TESTTIMEAUG": true,
+  "WRITE_MODELMETADATA": true,
+  "OTSU_THRESHOLD": true      
 }