Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation
`# Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation

## Introduction
This code have simply upgraded code in the paper, "DenseASPP for Semantic Segmentation in Street Scenes". The paper don't have training code, only proposed model code so our team have implemented training code. But not simple training implementation code, our contribution is proposed.
Expand Down
1 change: 0 additions & 1 deletion denseASPP.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ class DenseASPP(nn.Module):
* output_scale can only set as 8 or 16
"""
def __init__(self, args, model_cfg, n_class=19, output_stride=8):
# def __init__(self, model_cfg, n_class=19, output_stride=8):
super(DenseASPP, self).__init__()
# bn_size = model_cfg['bn_size']
bn_size = args.batch_size
Expand Down
53 changes: 45 additions & 8 deletions miscellaneous/misc.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import os
from math import ceil

import torch.nn.functional as F
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch.autograd import Variable
import math
import os

from sklearn.metrics import confusion_matrix
import math
from torch.autograd import Variable
from tqdm import tqdm
from math import ceil
from torch import nn

def check_mkdir(dir_name):
if not os.path.exists(dir_name):
Expand Down Expand Up @@ -404,4 +404,41 @@ def wrapper(self, x):
outputs_all_scales += outputs
return outputs_all_scales

return wrapper
return wrapper

def evaluate_model(args, val_dataloader, model, criterion):
eval_miou_sum = 0
# sum_iou_class = 0
eval_loss_sum = 0
for sampled_eval in tqdm(val_dataloader.data):
with torch.no_grad():
eval_image = sampled_eval['image'].cuda(args.gpu, non_blocking=True)
eval_gt = sampled_eval['gt'].cuda(args.gpu, non_blocking=True)

eval_output = model(eval_image)
model.eval()
eval_loss = criterion(eval_output, eval_gt)

eval_output = F.softmax(eval_output, dim=1)
eval_output = torch.argmax(eval_output, dim=1)
eval_output = eval_output.contiguous().view(-1)
eval_gt = eval_gt.contiguous().view(-1)

iou_per_class = []
for num_class in range(len(val_dataloader.class_names)):
true_class = (eval_output == num_class)
true_label = (eval_gt == num_class)
if true_label.long().sum().item() == 0:
iou_per_class.append(np.nan)
else:
intersect = torch.logical_and(true_class, true_label).sum().float().item()
union = torch.logical_or(true_class, true_label).sum().float().item()

iou = (intersect + 1e-10) / (union + 1e-10)
iou_per_class.append(iou)

eval_miou_sum += np.nanmean(iou_per_class)
# sum_iou_class += sum(iou_per_class)
eval_loss_sum += eval_loss

return eval_loss_sum, eval_miou_sum
47 changes: 24 additions & 23 deletions msaspp_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import random
import numpy as np
import torch
import torchvision

from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader, dataloader
Expand Down Expand Up @@ -130,6 +131,14 @@ def __getitem__(self, index):
gt_copy[gt == key] = value
gt = Image.fromarray(gt_copy.astype(np.uint8))

if self.mode == 'eval':
image = np.array(image, dtype=np.float32) / 255.0
gt = np.array(gt, dtype=np.float32)

sample = {'image': image, 'gt': gt}
sample = self.transform(sample)
return data_name, sample

rescaled_image, rescaled_gt = self.resize_random_crop(image, gt, self.args.input_height, self.args.input_width)

rescaled_image = np.array(rescaled_image, dtype=np.float32) / 255.0
Expand All @@ -139,33 +148,25 @@ def __getitem__(self, index):
sample = {'image': image, 'gt': gt}
sample = self.transform(sample)

if self.mode == 'eval':
return data_name, sample

return sample

# elif self.mode == 'val':
# image = np.array(image, dtype=np.float32) / 255.0
# gt = np.array(gt, dtype=np.float32)

# sample = {'image': image, 'gt': gt}
# sample = self.transform(sample)
# return sample

# def rotate_image(self, img, angle, flag=Image.BILINEAR):
# result = img.rotate(angle, resample=flag)

# return result

def resize_random_crop(self, image, gt, height, width):
scaling = random.uniform(0.5, 2.0)
scale_w, scale_h = [int(i*scaling) for i in image.size]

resized_image = image.resize((scale_w, scale_h), Image.CUBIC)
resized_gt = gt.resize((scale_w, scale_h), Image.NEAREST)

i, j, h, w = transforms.RandomCrop.get_params(resized_image, output_size=(height, width))
crop_image = F.crop(resized_image, i, j, h, w)
crop_gt = F.crop(resized_gt, i, j, h, w)
x1 = random.randint(0, scale_w - width)
y1 = random.randint(0, scale_h - height)

crop_image = resized_image.crop((x1, y1, x1 + width, y1 + height))
crop_gt = resized_gt.crop((x1, y1, x1 + width, y1 + height))

# i, j, h, w = transforms.RandomCrop.get_params(resized_image, output_size=(height, width))
# crop_image = F.crop(resized_image, i, j, h, w)
# crop_gt = F.crop(resized_gt, i, j, h, w)

return crop_image, crop_gt

Expand All @@ -185,13 +186,13 @@ def train_preprocess(self, image, gt):

def augment_image(self, image):
# gamma augmentation
# gamma = random.uniform(0.5, 2.0)
# image_aug = image ** gamma
gamma = random.uniform(0.9, 1.1)
image_aug = image ** gamma

# brightness augmentation
brightness = random.uniform(-10, 10)
image_aug = image * brightness
brightness = random.uniform(0.9, 1.1)
image_aug = image_aug * brightness

# color augmentation
# colors = np.random.uniform(0.9, 1.1, size=3)
# white = np.ones((image.shape[0], image.shape[1]))
Expand Down
96 changes: 44 additions & 52 deletions msaspp_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,45 +14,35 @@
from tqdm import tqdm

parser = argparse.ArgumentParser(description="msaspp image segmentation inference code")
parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v2')
parser.add_argument('--data_path', type=str, help='test data path', default=os.getcwd())
parser.add_argument('--input_height', type=int, help='input height', default=512)
parser.add_argument('--input_width', type=int, help='input width', default=512)
parser.add_argument('--batch_size', type=int, help='train batch size', default=8)
parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default=os.path.join(os.getcwd(), 'log'))
parser.add_argument('--num_checkpoint', type=str, help='model to be saved after training', default='model-0029000_mIoU-0.486.pth')
parser.add_argument('--num_seed', type=int, help='random seed number', default=1)
parser.add_argument('--num_threads', type=int, help='number of threads to use for data loading', default=5)
parser.add_argument('--gpu', type=int, help='GPU id to use', default=0)
parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v3')
parser.add_argument('--data_path', type=str, help='test data path', default=os.getcwd())
parser.add_argument('--input_height', type=int, help='input height', default=512)
parser.add_argument('--input_width', type=int, help='input width', default=512)
parser.add_argument('--batch_size', type=int, help='train batch size', default=8)
parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default=os.path.join(os.getcwd(), 'log'))
parser.add_argument('--num_checkpoint', type=str, help='model to be saved after training', default='model-0029500_mIoU-0.445.pth')
parser.add_argument('--num_seed', type=int, help='random seed number', default=1)
parser.add_argument('--num_threads', type=int, help='number of threads to use for data loading', default=5)
parser.add_argument('--gpu', type=int, help='GPU id to use', default=0)
args = parser.parse_args()

def save_prediction(prediction):
command = 'mkdir ' + os.path.join(os.getcwd(), "prediction_image")
os.system(command)

plt.figure(figsize=(40, 10))
for i in range(19):
pred = prediction[:, i, :].permute(1, 2, 0)
pred = pred.detach().cpu().numpy()
plt.subplot(1, 19, i+1)
plt.imshow(pred)
plt.axis('off')

plt.savefig(os.path.join(os.getcwd(), "prediction_image", "pred_imageset.png"), dpi=400, bbox_inches='tight')
print('Saving done')

def cover_colormap(image):
def check_directory(path):
if not os.path.exists(path):
os.mkdir(path)

def fill_colormap(prediction):
COLOR_MAP = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153),
(250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60),
(255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)]
(250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60),
(255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)]

image = image.data.cpu().numpy()
# image = image[0, :]
image = image.argmax(axis=1)[0]
row, col = image.shape
prediction = prediction.data.cpu().numpy()
# prediction = prediction[0, :]
prediction = prediction.argmax(axis=1)[0]
row, col = prediction.shape
dst = np.zeros((row, col, 3), dtype=np.uint8)
for i in range(19):
dst[image == i] = COLOR_MAP[i]
dst[prediction == i] = COLOR_MAP[i]
dst = np.array(dst, dtype=np.uint8)
dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR)

Expand All @@ -78,46 +68,48 @@ def get_iou(prediction, gt):
return iou_per_class

def test(args):
dataloader_val = msasppDataLoader(args, mode='val')
dataloader_eval = msasppDataLoader(args, mode='eval')

model = DenseASPP(args, model_cfg=DenseASPP121.Model_CFG)
torch.cuda.set_device(args.gpu)
model = torch.nn.DataParallel(model, device_ids=[args.gpu])

model_path = os.path.join(args.checkpoint_path, args.model_name, 'eval_model', args.num_checkpoint)
model_path = os.path.join(args.log_directory, args. model_name, 'eval_model', args.num_checkpoint)
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model'])
model.eval()
model.cuda(args.gpu)

with torch.no_grad():
for idx, sample in enumerate(dataloader_val.data):

for idx, (name, sample) in enumerate(dataloader_eval.data):
image = sample['image'].cuda(args.gpu)
gt = sample['gt'].cuda(args.gpu)

prediction = model(image)

prediction = F.log_softmax(prediction, dim=1)
iou_per_class = get_iou(prediction, gt)
if prediction.size()[0] < 1024:
prediction = F.upsample(prediction, size=(1024, 2048), mode='bilinear')
mIoU = np.nanmean(iou_per_class)
print(mIoU)

# color_prediction = cover_colormap(prediction)
color_prediction = fill_colormap(prediction)
print("idx: {}, data name: {}, mIoU: {}".format(idx+1, *name, mIoU))

# plt.suptitle("{}".format(*data_name))
# plt.figure(figsize=(10, 5))
# plt.subplot(1, 2, 1)
# plt.imshow(color_prediction)
# plt.axis('off')
check_directory(path=os.path.join(args.log_directory, args.model_name, 'prediction_image'))
plt.figure(figsize=(18, 5))
plt.suptitle("name: {} - mIoU: {}".format(*name, mIoU))
plt.subplot(1, 2, 1)
plt.imshow(color_prediction)
plt.axis('off')

# plt_img = image.squeeze().permute(1, 2, 0)
# plt_img = plt_img.data.cpu().numpy()
# plt.subplot(1, 2, 2)
# plt.imshow(plt_img)
# plt.axis('off')

# plt.savefig('{}_color.png'.format(*data_name), dpi=400, bbox_inches='tight')
a=1
plt_img = image.squeeze().permute(1, 2, 0)
plt_img = plt_img.data.cpu().numpy()
plt.subplot(1, 2, 2)
plt.imshow(plt_img)
plt.axis('off')
plt.savefig(os.path.join(args.log_directory, args.model_name, 'prediction_image', '{}_color'.format(*name)), dpi=400, bbox_inches='tight')

if __name__ == "__main__":
test(args)
Loading