Merge pull request #73 from iceberg-project/devel

Devel
iceberg-project · Dec 21, 2020 · aaf2c0d · aaf2c0d
2 parents 5f0d326 + 1adc2e8
commit aaf2c0d
Show file tree

Hide file tree

Showing 7 changed files with 75 additions and 103 deletions.
diff --git a/README.md b/README.md
@@ -11,9 +11,9 @@
 - numpy
 - scipy
 - pandas
-- torch==0.4.0
-- torchvision==0.2.1
-- tensorboardX==1.8
+- torch
+- torchvision
+- tensorboardX
 - opencv-python
 - rasterio
 - affine
@@ -44,12 +44,12 @@ $ cd $SCRATCH                      # switch to your working space.
 $ mkdir Seals                      # create a directory to work in.
 $ cd Seals                         # move into your working directory.
 $ module load cuda                 # load parallel computing architecture.
-$ module load python3              # load correct python version.
-$ virtualenv seals_env             # create a virtual environment to isolate your work from the default system.
-$ source seals_env/bin/activate    # activate your environment. Notice the command line prompt changes to show your environment on the next line.
+$ module load anaconda3              # load correct python version.
+$ conda create -p seals_env python=3.9 -y     # create a virtual environment to isolate your work from the default system.
+$ source activate <path>/seals_env    # activate your environment. Notice the command line prompt changes to show your environment on the next line.
 [seals_env] $ pwd
 /pylon5/group/username/Seals
-[seals_env] $ export PYTHONPATH=`pwd`/seals_env/lib/python3.5/site-packages # set a system variable to point python to your specific code.
+[seals_env] $ export PYTHONPATH=<path>/seals_env/lib/python3.9/site-packages # set a system variable to point python to your specific code. (Replace <path> with the results of pwd command above.
 [seals_env] $ pip install iceberg_seals.search # pip is a python tool to extract the requested software (iceberg_seals.search in this case) from a repository. (this may take several minutes).
 ```
 
@@ -58,10 +58,10 @@ $ source seals_env/bin/activate    # activate your environment. Notice the comma
 ```bash
 $ git clone https://github.com/iceberg-project/Seals.git
 $ module load cuda
-$ module load python3
-$ virtualenv seals_env
-$ source seals_env/bin/activate
-[seals_env] $ export PYTHONPATH=<path>/seals_env/lib/python3.5/site-packages
+$ module load anaconda3              # load correct python version.
+$ conda create -p seals_env python=3.9 -y     # create a virtual environment to isolate your work from the default system.
+$ source activate <path>/seals_env    # activate your environment. Notice the command line prompt changes to show your environment on the next line.
+[seals_env] $ export PYTHONPATH=<path>/seals_env/lib/python3.9/site-packages # set a system variable to point python to your specific code. (Replace <path> with the results of pwd command above.
 [seals_env] $ pip install . --upgrade
 ```
 
@@ -71,19 +71,20 @@ $ source seals_env/bin/activate
 $ interact -p GPU-small --gres=gpu:p100:1  # request a compute node.  This package has been tested on P100 GPUs on bridges, but that does not exclude any other resource that offers the same GPUs. (this may take a minute or two or more to receive an allocation).
 $ cd $SCRATCH/Seals                # make sure you are in the same directory where everything was set up before.
 $ module load cuda                 # load parallel computing architecture, as before.
-$ module load python3              # load correct python version, as before.
-$ source seals_env/bin/activate    # activate your environment, no need to create a new environment because the Seals tools are installed and isolated here.
-[iceberg_seals] $ iceberg_seals.detect --help  # this will display a help screen of available usage and parameters.
+$ module load anaconda3              # load correct python version, as before.
+$ source activate <path>/seals_env    # activate your environment. Notice the command line prompt changes to show your environment on the next line.
+[seals_env] $ export PYTHONPATH=<path>/seals_env/lib/python3.9/site-packages # set a system variable to point python to your specific code. (Replace <path> with the results of pwd command above.
+[iceberg_seals] $ iceberg_seals.predicting --help  # this will display a help screen of available usage and parameters.
 ```
 ## Prediction
 - Download a pre-trained model at: https://github.com/iceberg-project/Seals/tree/master/models/Heatmap-Cnt/UnetCntWRN/UnetCntWRN_ts-vanilla.tar 
 
 You can download to your local machine and use scp, ftp, rsync, or Globus to [transfer to bridges](https://portal.xsede.org/psc-bridges).
 
 Seals predicting is executed in two steps: 
-First, follow the environment setup commands under 'To test' above. Then create tiles from an input GeoTiff image and write to the output_folder. The scale_bands parameter (in pixels) depends on the trained model being used.  The default scale_bands is 299 for the pre-trained model downloaded above.  If you use your own model the scale_bands may be different.
+First, follow the environment setup commands under 'To test' above. Then create tiles from an input GeoTiff image and write to the output_folder. The scale_bands parameter (in pixels) depends on the trained model being used.  The default scale_bands is 224 for the pre-trained model downloaded above.  If you use your own model the scale_bands may be different.
 ```bash
-[iceberg_seals] $ iceberg_seals.tiling --scale_bands=299 --input_image=<image_abspath> --output_folder=./test
+[iceberg_seals] $ iceberg_seals.tiling --scale_bands=224 --input_image=<image_abspath> --output_folder=./test
 ```
 Then, detect seals on each tile and output counts and confidence for each tile.
 ```bash

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.4
+1.5
diff --git a/bin/iceberg_seals.predicting b/bin/iceberg_seals.predicting
@@ -17,71 +17,39 @@ import argparse
 import os
 import shutil
 from iceberg_seals.search.utils.model_library import *
-from iceberg_seals.search.predicting.predict_sealnet import predict_patch
+from iceberg_seals.search.predicting.predict_sealnet import predict_patch, parse_args
 warnings.filterwarnings('ignore', module='PIL')
 
 
 if __name__ == "__main__":
-
-    parser = argparse.ArgumentParser(description='validates a CNN at the haul \
-                                                  out level')
-    parser.add_argument('--input_image', type=str, required=True,
-                        help='base directory to recursively search for \
-                              validation images in')
-    parser.add_argument('--model_architecture', type=str, required=True,
-                        help='model architecture for seal detection')
-    parser.add_argument('--hyperparameter_set', type=str, required=True,
-                        help='combination of hyperparameters used for CNNs, \
-                              must be a member of hyperparameters dictionary')
-    parser.add_argument('--model_path', type=str, required=True,
-                        help='folder where the model is tarball is')
-    parser.add_argument('--training_set', type=str, required=True,
-                        help='training set where models were trained')
-    parser.add_argument('--test_folder', type=str, default='to_classify',
-                        help='folder where the model will be saved')
-    parser.add_argument('--output_folder', type=str, default='./',
-                        help='folder where results will be saved')
-    parser.add_argument('--tile', type=bool, default=False,
-                        help='Input image needs to be tiled')
-    # unroll arguments
-    args = parser.parse_args()
-    pipeline = model_archs[args.model_architecture]['pipeline']
-    input_image = args.input_image
-    output_folder = args.test_folder
-    scales = [model_archs[args.model_architecture]['input_size']]
-
-    # check if the image needs to be tiled.
-    if args.tile:
-
-        from tile_raster import tile_raster
+    args = parse_args()
 
-        # check for pre-existing tiles and subtiles
-        if os.path.exists('%s/tiles' % (args.test_folder)):
-            shutil.rmtree('%s/tiles' % (args.test_folder))
+    # check for invalid inputs
+    assert args.model_architecture in model_archs, "Invalid architecture -- see supported" \
+                                                   " architectures: %s" % list(model_archs.keys())
 
-        # tile raster into patches
-        tile_raster(input_image, output_folder, scales)
+    assert args.hyperparameter_set in hyperparameters, "Hyperparameter combination is not defined in " \
+                                                       "./utils/model_library.py"
 
-    # predict tiles
+    # find pipeline
+    pipeline = model_archs[args.model_architecture]['pipeline']
+
+    # create model instance
     model = model_defs[pipeline][args.model_architecture]
 
+    # check for GPU support and set model to evaluation mode
     use_gpu = torch.cuda.is_available()
     if use_gpu:
         model.cuda()
     model.eval()
 
-    # load saved model weights from training
-    model_name = args.model_architecture + '_ts-' + \
-        args.training_set.split('_')[-1]
-    model.load_state_dict(
-        torch.load("%s/%s.tar" % (args.model_path, model_name)))
+    # load saved model weights from pt_train.py
+    model.load_state_dict(torch.load("%s/%s.tar" % (args.models_folder, args.model_name)))
 
-    predict_patch(input_image=args.input_image, model=model,
-                  input_size=model_archs[args.model_architecture]
-                  ['input_size'],
-                  batch_size=hyperparameters[args.hyperparameter_set]
-                  ['batch_size_test'],
-                  test_dir=args.test_folder,
-                  output_dir='%s' % (args.output_folder),
-                  num_workers=hyperparameters[args.hyperparameter_set]
-                                             ['num_workers_train'])
+    # run validation to get confusion matrix
+    predict_patch(input_image=args.input_dir.split('/')[-1], model=model,
+                  input_size=model_archs[args.model_architecture]['input_size'],
+                  input_dir=args.input_dir, output_dir=args.output_dir,
+                  batch_size=hyperparameters[args.hyperparameter_set]['batch_size_test'],
+                  num_workers=hyperparameters[args.hyperparameter_set]['num_workers_train'],
+                  remove_tiles=True)
diff --git a/bin/iceberg_seals.tiling b/bin/iceberg_seals.tiling
@@ -12,6 +12,12 @@ if __name__ == "__main__":
 
     # unroll arguments
     input_image = args.input_image
-    scales = [int(ele) for ele in args.scale_bands.split('_')]
     output_folder = args.output_folder
-    tile_raster(input_image, output_folder, scales, True)
+    bands = [int(ele) for ele in args.bands.split(',')]
+    patch_size = args.patch_size
+    stride = args.stride
+    geotiff = args.geotiff
+
+    # tile image
+    tile_raster(input_image, output_folder, bands, patch_size, stride, geotiff)
+
diff --git a/setup.py b/setup.py
@@ -193,8 +193,8 @@ def read(fname):
     'install_requires'  :  ['numpy',
                             'scipy',
                             'pandas',
-                            'torch==0.4.0',
-                            'torchvision==0.2.1',
+                            'torch',
+                            'torchvision',
                             'tensorboardX==1.8',
                             'opencv-python',
                             'rasterio',

diff --git a/src/iceberg_seals/search/predicting/predict_sealnet.py b/src/iceberg_seals/search/predicting/predict_sealnet.py
@@ -36,16 +36,18 @@
 
 def parse_args():
     parser = argparse.ArgumentParser(description='validates a CNN at the haul out level')
-    parser.add_argument('--test_dir', type=str, help='base directory to recursively search for validation images in')
-    parser.add_argument('--model_architecture', type=str, help='model architecture, must be a member of models '
-                                                               'dictionary')
-    parser.add_argument('--hyperparameter_set', type=str, help='combination of hyperparameters used, must be a member '
-                                                               'of hyperparameters dictionary')
-    parser.add_argument('--model_name', type=str, help='name of input model file from training, this name will also be '
-                                                       'used in subsequent steps of the pipeline')
+    parser.add_argument('--input_dir', type=str, help='base directory to recursively search for validation images in')
+    parser.add_argument('--model_architecture', type=str,
+                        default='UnetCntWRN', help='model architecture, must be a member of models '
+                                                   'dictionary')
+    parser.add_argument('--hyperparameter_set', type=str,
+                        default='B', help='combination of hyperparameters used, must be a member '
+                                          'of hyperparameters dictionary')
+    parser.add_argument('--model_name', type=str, default='UnetCntWRN_ts-vanilla.tar',
+                        help='name of input model file from training, this name will also be '
+                             'used in subsequent steps of the pipeline')
     parser.add_argument('--models_folder', type=str, default='saved_models', help='folder where the model tar file is'
                                                                                   'saved')
-    parser.add_argument('--input_image', type=str, help='filename of input raster')
     parser.add_argument('--output_dir', type=str, help='folder where output shapefiles will be stored')
     return parser.parse_args()
 
@@ -90,14 +92,14 @@ def get_xy_locs(array, count, min_dist=3):
     return np.array([(x // cols, x % cols) for x in flat_order[:count]])
 
 
-def predict_patch(input_image, model, output_dir, test_dir, batch_size=2, input_size=299, threshold=0.5, num_workers=1,
+def predict_patch(input_image, model, output_dir, input_dir, batch_size=2, input_size=299, threshold=0.5, num_workers=1,
                   remove_tiles=False):
     """
     Patch prediction function. Outputs shapefiles for counts and locations.
 
     :param input_image: filename raster image from tile_raster
     :param model: pytorch model
-    :param test_dir: directory with input tiles
+    :param input_dir: directory with input tiles
     :param output_dir: output directory name
     :param batch_size: number of images per mini-batch
     :param input_size: size of input images
@@ -111,11 +113,11 @@ def predict_patch(input_image, model, output_dir, test_dir, batch_size=2, input_
     data_transforms = transforms.Compose([
         transforms.CenterCrop(input_size),
         transforms.ToTensor(),
-        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        transforms.Normalize([0.485], [0.229])
     ])
 
     # load dataset
-    dataset = ImageFolderTest(test_dir, data_transforms)
+    dataset = ImageFolderTest(input_dir, data_transforms)
 
     # separate into batches with dataloader
     dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)
@@ -190,7 +192,7 @@ def predict_patch(input_image, model, output_dir, test_dir, batch_size=2, input_
 
     # load affine matrix
     affine_matrix = rasterio.Affine(*[ele for ele in pd.read_csv(
-        '%s/affine_matrix.csv' % (test_dir))['transform']])
+        '%s/affine_matrix.csv' % (input_dir))['transform']])
 
     # create geopandas DataFrames to store counts per patch and seal locations
     output_shpfile = gpd.GeoDataFrame()
@@ -241,9 +243,9 @@ def predict_patch(input_image, model, output_dir, test_dir, batch_size=2, input_
 
         # remove tiles
         if remove_tiles:
-            shutil.rmtree('{}/tiles'.format(test_dir))
+            shutil.rmtree('{}/'.format(input_dir))
 
-    print('Total predicted in %s: '% os.path.basename(test_dir), sum(pred_counts['predictions']))
+    print('Total predicted in %s: ' % os.path.basename(input_dir), sum(pred_counts['predictions']))
 
 
 def main():
@@ -274,9 +276,9 @@ def main():
                                                                 args.model_name)))
 
     # run validation to get confusion matrix
-    predict_patch(input_image=args.input_image, model=model,
+    predict_patch(input_image=args.input_dir.split('/')[-1], model=model,
                   input_size=model_archs[args.model_architecture]['input_size'],
-                  test_dir=args.test_dir, output_dir=args.output_dir,
+                  input_dir=args.input_dir, output_dir=args.output_dir,
                   batch_size=hyperparameters[args.hyperparameter_set]['batch_size_test'],
                   num_workers=hyperparameters[args.hyperparameter_set]['num_workers_train'],
                   remove_tiles=True)

diff --git a/src/iceberg_seals/search/utils/dataloaders/data_loader_test.py b/src/iceberg_seals/search/utils/dataloaders/data_loader_test.py
@@ -30,17 +30,12 @@ def make_dataset(dir, extensions):
     images = []
     file_names = []
     dir = os.path.expanduser(dir)
-    for target in sorted(os.listdir(dir)):
-        d = os.path.join(dir, target)
-        if not os.path.isdir(d):
-            continue
-
-        for root, _, fnames in sorted(os.walk(d)):
-            for fname in sorted(fnames):
-                if has_file_allowed_extension(fname, extensions):
-                    path = os.path.join(root, fname)
-                    images.append(path)
-                    file_names.append(fname)
+    for root, _, fnames in sorted(os.walk(dir)):
+        for fname in sorted(fnames):
+            if has_file_allowed_extension(fname, extensions):
+                path = os.path.join(root, fname)
+                images.append(path)
+                file_names.append(fname)
 
     return [images, file_names]
 
@@ -159,4 +154,4 @@ class ImageFolderTest(DatasetFolder):
     def __init__(self, root, transform=None, loader=default_loader):
         super(ImageFolderTest, self).__init__(root, loader, IMG_EXTENSIONS,
                                               transform=transform)
-        self.imgs = self.samples
+        self.imgs = self.samples