Semantic-Segmentation-Research · MKHan91 · Aug 22, 2021 · Aug 22, 2021 · Aug 22, 2021 · Aug 27, 2021
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation
+`# Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation
 
 ## Introduction
 This code have simply upgraded code in the paper, "DenseASPP for Semantic Segmentation in Street Scenes". The paper don't have training code, only proposed model code so our team have implemented training code. But not simple training implementation code, our contribution is proposed. 

diff --git a/denseASPP.py b/denseASPP.py
@@ -10,7 +10,6 @@ class DenseASPP(nn.Module):
     * output_scale can only set as 8 or 16
     """
     def __init__(self, args, model_cfg, n_class=19, output_stride=8):
-    # def __init__(self, model_cfg, n_class=19, output_stride=8):
         super(DenseASPP, self).__init__()
         # bn_size = model_cfg['bn_size']
         bn_size = args.batch_size

diff --git a/miscellaneous/misc.py b/miscellaneous/misc.py
@@ -1,14 +1,14 @@
-import os
-from math import ceil
-
+import torch.nn.functional as F
 import numpy as np
 import torch
-import torch.nn.functional as F
-from torch import nn
-from torch.autograd import Variable
+import math
+import os
 
 from sklearn.metrics import confusion_matrix
-import math
+from torch.autograd import Variable
+from tqdm import tqdm
+from math import ceil
+from torch import nn
 
 def check_mkdir(dir_name):
     if not os.path.exists(dir_name):
@@ -404,4 +404,41 @@ def wrapper(self, x):
                 outputs_all_scales += outputs
             return outputs_all_scales
 
-    return wrapper
+    return wrapper
+
+def evaluate_model(args, val_dataloader, model, criterion):
+    eval_miou_sum = 0
+    # sum_iou_class = 0
+    eval_loss_sum = 0
+    for sampled_eval in tqdm(val_dataloader.data):
+        with torch.no_grad():
+            eval_image = sampled_eval['image'].cuda(args.gpu, non_blocking=True)
+            eval_gt = sampled_eval['gt'].cuda(args.gpu, non_blocking=True)
+
+            eval_output = model(eval_image)
+            model.eval()
+            eval_loss = criterion(eval_output, eval_gt)
+
+            eval_output = F.softmax(eval_output, dim=1)
+            eval_output = torch.argmax(eval_output, dim=1)
+            eval_output = eval_output.contiguous().view(-1)
+            eval_gt = eval_gt.contiguous().view(-1)
+
+            iou_per_class = []
+            for num_class in range(len(val_dataloader.class_names)):
+                true_class = (eval_output == num_class)
+                true_label = (eval_gt == num_class)
+                if true_label.long().sum().item() == 0:
+                    iou_per_class.append(np.nan)
+                else:
+                    intersect = torch.logical_and(true_class, true_label).sum().float().item()
+                    union = torch.logical_or(true_class, true_label).sum().float().item()
+
+                    iou = (intersect + 1e-10) / (union + 1e-10)
+                    iou_per_class.append(iou)
+
+            eval_miou_sum += np.nanmean(iou_per_class)
+            # sum_iou_class += sum(iou_per_class)
+            eval_loss_sum += eval_loss
+
+    return eval_loss_sum, eval_miou_sum
diff --git a/msaspp_dataloader.py b/msaspp_dataloader.py
@@ -2,6 +2,7 @@
 import random
 import numpy as np
 import torch
+import torchvision
 
 from torchvision.transforms import functional as F
 from torch.utils.data import Dataset, DataLoader, dataloader
@@ -130,6 +131,14 @@ def __getitem__(self, index):
             gt_copy[gt == key] = value
         gt = Image.fromarray(gt_copy.astype(np.uint8))
 
+        if self.mode == 'eval':
+            image = np.array(image, dtype=np.float32) / 255.0
+            gt = np.array(gt, dtype=np.float32)
+
+            sample = {'image': image, 'gt': gt}
+            sample = self.transform(sample)
+            return data_name, sample
+
         rescaled_image, rescaled_gt = self.resize_random_crop(image, gt, self.args.input_height, self.args.input_width)
 
         rescaled_image = np.array(rescaled_image, dtype=np.float32) / 255.0
@@ -139,33 +148,25 @@ def __getitem__(self, index):
         sample = {'image': image, 'gt': gt}
         sample = self.transform(sample)
 
-        if self.mode == 'eval':
-            return data_name, sample
 
         return sample
 
-        # elif self.mode == 'val':
-        #     image = np.array(image, dtype=np.float32) / 255.0
-        #     gt = np.array(gt, dtype=np.float32)
-
-        #     sample = {'image': image, 'gt': gt}
-        #     sample = self.transform(sample)
-        #     return sample
-
-    # def rotate_image(self, img, angle, flag=Image.BILINEAR):
-    #     result = img.rotate(angle, resample=flag)
-
-    #     return result
-
     def resize_random_crop(self, image, gt, height, width):
         scaling = random.uniform(0.5, 2.0)
         scale_w, scale_h = [int(i*scaling) for i in image.size]
+
         resized_image = image.resize((scale_w, scale_h), Image.CUBIC)
         resized_gt = gt.resize((scale_w, scale_h), Image.NEAREST)
 
-        i, j, h, w = transforms.RandomCrop.get_params(resized_image, output_size=(height, width))
-        crop_image = F.crop(resized_image, i, j, h, w)
-        crop_gt = F.crop(resized_gt, i, j, h, w)
+        x1 = random.randint(0, scale_w - width)
+        y1 = random.randint(0, scale_h - height)
+
+        crop_image = resized_image.crop((x1, y1, x1 + width, y1 + height))
+        crop_gt = resized_gt.crop((x1, y1, x1 + width, y1 + height))
+
+        # i, j, h, w = transforms.RandomCrop.get_params(resized_image, output_size=(height, width))
+        # crop_image = F.crop(resized_image, i, j, h, w)
+        # crop_gt = F.crop(resized_gt, i, j, h, w)
 
         return crop_image, crop_gt
 
@@ -185,13 +186,13 @@ def train_preprocess(self, image, gt):
 
     def augment_image(self, image):
         # gamma augmentation
-        # gamma  = random.uniform(0.5, 2.0)
-        # image_aug = image ** gamma
+        gamma  = random.uniform(0.9, 1.1)
+        image_aug = image ** gamma
 
         # brightness augmentation
-        brightness = random.uniform(-10, 10)
-        image_aug = image * brightness
-        
+        brightness = random.uniform(0.9, 1.1)
+        image_aug = image_aug * brightness
+
         # color augmentation
         # colors = np.random.uniform(0.9, 1.1, size=3)
         # white = np.ones((image.shape[0], image.shape[1]))

diff --git a/msaspp_inference.py b/msaspp_inference.py
@@ -14,45 +14,35 @@
 from tqdm import tqdm
 
 parser = argparse.ArgumentParser(description="msaspp image segmentation inference code")
-parser.add_argument('--model_name',             type=str,   help='model name to be trained',                   default='denseaspp-v2')
-parser.add_argument('--data_path',              type=str,   help='test data path',                             default=os.getcwd())
-parser.add_argument('--input_height',           type=int,   help='input height',                               default=512)
-parser.add_argument('--input_width',            type=int,   help='input width',                                default=512)
-parser.add_argument('--batch_size',             type=int,   help='train batch size',                           default=8)
-parser.add_argument('--checkpoint_path',        type=str,   help='path to a specific checkpoint to load',      default=os.path.join(os.getcwd(), 'log'))
-parser.add_argument('--num_checkpoint',         type=str,   help='model to be saved after training',           default='model-0029000_mIoU-0.486.pth')
-parser.add_argument('--num_seed',               type=int,   help='random seed number',                         default=1)
-parser.add_argument('--num_threads',            type=int,   help='number of threads to use for data loading',  default=5)
-parser.add_argument('--gpu',                    type=int,   help='GPU id to use',                              default=0)
+parser.add_argument('--model_name',             type=str,   help='model name to be trained',                         default='denseaspp-v3')
+parser.add_argument('--data_path',              type=str,   help='test data path',                                   default=os.getcwd())
+parser.add_argument('--input_height',           type=int,   help='input height',                                     default=512)
+parser.add_argument('--input_width',            type=int,   help='input width',                                      default=512)
+parser.add_argument('--batch_size',             type=int,   help='train batch size',                                 default=8)
+parser.add_argument('--log_directory',          type=str,   help='directory to save checkpoints and summaries',      default=os.path.join(os.getcwd(), 'log'))
+parser.add_argument('--num_checkpoint',         type=str,   help='model to be saved after training',                 default='model-0029500_mIoU-0.445.pth')
+parser.add_argument('--num_seed',               type=int,   help='random seed number',                               default=1)
+parser.add_argument('--num_threads',            type=int,   help='number of threads to use for data loading',        default=5)
+parser.add_argument('--gpu',                    type=int,   help='GPU id to use',                                    default=0)
 args = parser.parse_args()
 
-def save_prediction(prediction):
-    command = 'mkdir ' + os.path.join(os.getcwd(), "prediction_image")
-    os.system(command)
-
-    plt.figure(figsize=(40, 10))
-    for i in range(19):
-        pred = prediction[:, i, :].permute(1, 2, 0)
-        pred = pred.detach().cpu().numpy()
-        plt.subplot(1, 19, i+1)
-        plt.imshow(pred)
-        plt.axis('off')
-
-    plt.savefig(os.path.join(os.getcwd(), "prediction_image", "pred_imageset.png"), dpi=400, bbox_inches='tight')
-    print('Saving done')
 
-def cover_colormap(image):
+def check_directory(path):
+    if not os.path.exists(path):
+        os.mkdir(path)
+
+def fill_colormap(prediction):
     COLOR_MAP = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153),
-        (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60),
-        (255,  0,  0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)]
+                 (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60),
+                 (255,  0,  0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)]
 
-    image = image.data.cpu().numpy()
-    # image = image[0, :]
-    image = image.argmax(axis=1)[0]
-    row, col = image.shape
+    prediction = prediction.data.cpu().numpy()
+    # prediction = prediction[0, :]
+    prediction = prediction.argmax(axis=1)[0]
+    row, col = prediction.shape
     dst = np.zeros((row, col, 3), dtype=np.uint8)
     for i in range(19):
-        dst[image == i] = COLOR_MAP[i]
+        dst[prediction == i] = COLOR_MAP[i]
     dst = np.array(dst, dtype=np.uint8)
     dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR)
 
@@ -78,46 +68,48 @@ def get_iou(prediction, gt):
     return iou_per_class
 
 def test(args):
-    dataloader_val = msasppDataLoader(args, mode='val')
+    dataloader_eval = msasppDataLoader(args, mode='eval')
 
     model = DenseASPP(args, model_cfg=DenseASPP121.Model_CFG)
     torch.cuda.set_device(args.gpu)
     model = torch.nn.DataParallel(model, device_ids=[args.gpu])
 
-    model_path = os.path.join(args.checkpoint_path, args.model_name, 'eval_model', args.num_checkpoint)
+    model_path = os.path.join(args.log_directory, args. model_name, 'eval_model', args.num_checkpoint)
     checkpoint = torch.load(model_path)
     model.load_state_dict(checkpoint['model'])
     model.eval()
     model.cuda(args.gpu)
 
     with torch.no_grad():
-        for idx, sample in enumerate(dataloader_val.data):
-
+        for idx, (name, sample) in enumerate(dataloader_eval.data):
             image = sample['image'].cuda(args.gpu)
             gt = sample['gt'].cuda(args.gpu)
 
             prediction = model(image)
-
+                
             prediction = F.log_softmax(prediction, dim=1)
             iou_per_class = get_iou(prediction, gt)
+            if prediction.size()[0] < 1024:
+                prediction = F.upsample(prediction, size=(1024, 2048), mode='bilinear')
             mIoU = np.nanmean(iou_per_class)
-            print(mIoU)
 
-            # color_prediction = cover_colormap(prediction)
+            color_prediction = fill_colormap(prediction)
+            print("idx: {}, data name: {}, mIoU: {}".format(idx+1, *name, mIoU))
 
-            # plt.suptitle("{}".format(*data_name))
-            # plt.figure(figsize=(10, 5))
-            # plt.subplot(1, 2, 1)
-            # plt.imshow(color_prediction)
-            # plt.axis('off')
+            check_directory(path=os.path.join(args.log_directory, args.model_name, 'prediction_image'))
+            plt.figure(figsize=(18, 5))
+            plt.suptitle("name: {} - mIoU: {}".format(*name, mIoU))
+            plt.subplot(1, 2, 1)
+            plt.imshow(color_prediction)
+            plt.axis('off')
 
-            # plt_img = image.squeeze().permute(1, 2, 0)
-            # plt_img = plt_img.data.cpu().numpy()
-            # plt.subplot(1, 2, 2)
-            # plt.imshow(plt_img)
-            # plt.axis('off')
-
-            # plt.savefig('{}_color.png'.format(*data_name), dpi=400, bbox_inches='tight')
-            a=1
+            plt_img = image.squeeze().permute(1, 2, 0)
+            plt_img = plt_img.data.cpu().numpy()
+            plt.subplot(1, 2, 2)
+            plt.imshow(plt_img)
+            plt.axis('off')
+                        
+            plt.savefig(os.path.join(args.log_directory, args.model_name, 'prediction_image', '{}_color'.format(*name)), dpi=400, bbox_inches='tight')
+
 if __name__ == "__main__":
     test(args)