From ec0006dd8363563fcbe1d102215eebbad8f49172 Mon Sep 17 00:00:00 2001 From: SunWng <63491090+SunWng@users.noreply.github.com> Date: Sun, 22 Aug 2021 15:47:52 +0900 Subject: [PATCH 01/12] Initial commit --- README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..d4b2185 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# SpyGR-segmentation +SpyGR code after paper review From 414408eb0bc2792379a7b72d08d5de4240bde6bb Mon Sep 17 00:00:00 2001 From: SunWng Date: Sun, 22 Aug 2021 15:56:32 +0900 Subject: [PATCH 02/12] 2021-08-22 initial commit --- spygr.py | 118 ++++++++++++++++++++++++++++++ spygr_dataloader.py | 0 spygr_main.py | 0 spygr_temp.py | 171 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 289 insertions(+) create mode 100644 spygr.py create mode 100644 spygr_dataloader.py create mode 100644 spygr_main.py create mode 100644 spygr_temp.py diff --git a/spygr.py b/spygr.py new file mode 100644 index 0000000..c08f10a --- /dev/null +++ b/spygr.py @@ -0,0 +1,118 @@ +import numpy as np +import pandas as pd + +import gc +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch import Tensor +import torchvision.models as models + +class GRModule(nn.Module): + def __init__(self, x, device): + super().__init__() + self.x = x + self.device = device + self.M = 64 + self.phi_conv = nn.Conv2d(self.x.shape[1], self.M, kernel_size=3, stride=1, padding=1, bias=True) + self.glob_pool = nn.AvgPool2d(self.x.shape[2], self.x.shape[3]) + self.glob_conv = nn.Conv2d(self.x.shape[1], self.M, kernel_size=1, stride=1, padding=0, bias=False) + self.graph_weight = nn.Conv2d(self.x.shape[1], self.x.shape[1], kernel_size=1, stride=1, padding=0, bias=False) + self.relu = nn.ReLU() + + def forward(self): + x_phi_conv = self.phi_conv(self.x) + x_phi = x_phi_conv.view([x_phi_conv.shapep[0], -1, self.M]) + x_phi = self.relu(x_phi) + x_phi_T = x_phi_conv.view([x_phi_conv.shape[0], self.M, -1]) + x_phi_T = self.relu(x_phi_T) + + x_glob_pool = self.glob_pool(self.x) + x_glob_conv = self.glob_conv(x_glob_pool) + x_glob_diag = torch.zeros(x_glob_conv.shape[0], x_glob_conv.shape[1], x_glob_conv[1]).to(self.device) + + for i in range(x_glob_conv.shape[0]): + x_glob_diag[i, :, :] = torch.diag(x_glob_conv[i, :, :, :].reshape(1, x_glob_conv.shape[1])) + + A_tilde = torch.matmul(torch.matmul(x_phi, x_glob_diag), x_phi_T) + # D_tilde = torch.zeros_like(A_tilde).to(self.device) + D_sqrt_inv = torch.zeros_like(A_tilde).to(self.device) + + # temp_sum = torch.sum(A_tilde, 2) + # for i in range(D_tilde.shape[0]): + # D_tilde[i, :, :] = torch.diag(temp_sum[i, :]) + + diag_sum = torch.sum(A_tilde, 2) + + for i in range(diag_sum.shape[0]): + diag_sqrt = 1.0 / torch.sqrt(diag_sum[i, :]) + diag_sqrt[torch.isnan(diag_sqrt)] = 0 + D_sqrt_inv[i, :, :] = torch.diag(diag_sqrt) + + I = torch.eye(D_sqrt_inv.shape[1]).to(self.device) + I = I.repeat(D_sqrt_inv.shape[0], 1, 1) + + L_tilde = I - torch.matmul(torch.matmul(D_sqrt_inv, A_tilde), D_sqrt_inv) + + out = torch.matmul(L_tilde, self.x.reshape(self.x.shape[0], -1, self.x.shape[1])) + out = out.reshape(self.x.shape[0], self.x.shape[1], self.x.shape[2], self.x.shape[3]) + out = self.graph_weight(out) + out = self.relu(out) + + return out + +class upconv(nn.Module): + def __init__(self, in_channels, out_chennels, ratio): + super().__init__() + self.elu = nn.ELU() + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_chennels, bias=False, kernel_size=3, stride=1, padding=1) + self.ratio = ratio + + def forward(self, x): + up_x = F.interpolate(x, scale_factor=self.ratio, mode="nearest") + out = self.conv(up_x) + out = self.elu(out) + + return out + +class SpyGR(nn.Module): + def __init__(self, device): + super().__init__() + self.device = device + self.pretrained_resnet = models.resnet50(pretrained=True).to(self.device) + + for param in self.pretrained_resnet.parameters(): + param.requires_grad_(False) + + self.reduce_dim = nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1, bias=False) + self.down_samp = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) + + def forward(self, x): + x = self.pretrained_resnet.conv1(x) + x = self.pretrained_resnet.bn1(x) + x = self.pretrained_resnet.relu(x) + x = self.pretrained_resnet.maxpool(x) + x = self.pretrained_resnet.layer1(x) + x = self.pretrained_resnet.layer2(x) + x = self.pretrained_resnet.layer3(x) + x = self.pretrained_resnet.layer4(x) + x = self.reduce_dim(x) + + GR_1 = GRModule(x, self.device).to(self.device) + x = self.down_samp(x) + GR_2 = GRModule(x, self.device).to(self.device) + x = self.down_samp(x) + GR_3 = GRModule(x, self.device).to(self.device) + + x_gr_1 = GR_1.forward() + x_gr_2 = GR_2.forward() + x_gr_3 = GR_3.forward() + + out = x_gr_2 + F.interpolate(x_gr_3, scale_factor=2, mode="nearest") + out = x_gr_1 + F.interpolate(out, scale_factor=2, mode="nearest") + + final_upsampling = upconv(512, 3, ratio=64).to(self.device) + out = final_upsampling.forward(out) + + return out \ No newline at end of file diff --git a/spygr_dataloader.py b/spygr_dataloader.py new file mode 100644 index 0000000..e69de29 diff --git a/spygr_main.py b/spygr_main.py new file mode 100644 index 0000000..e69de29 diff --git a/spygr_temp.py b/spygr_temp.py new file mode 100644 index 0000000..df5a4d7 --- /dev/null +++ b/spygr_temp.py @@ -0,0 +1,171 @@ +import os +import sys +import csv +import numpy as np +import pandas as pd +import scipy +import sklearn +from sklearn.model_selection import KFold +import matplotlib +import matplotlib.pyplot as plt +from PIL import Image +import gc +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch import Tensor +from torch.utils.data import DataLoader, Dataset +import torchvision +import torchvision.transforms as T +import torchvision.models as models +from torchvision import transforms + +class SpygrModule(nn.Module): + def __init__(self, x, device): + super().__init__() + self.x = x + self.x.to(device) + self.M = 64 + self.device = device + self.phi_conv = nn.Conv2d(x.shape[1], self.M, kernel_size=3, stride=1, padding=1, bias=True) + self.glob_pool = nn.AvgPool2d((x.shape[2], x.shape[3])) + self.glob_conv = nn.Conv2d(x.shape[1], self.M, kernel_size=1, stride=1, padding=0, bias=False) + self.graph_weight = nn.Conv2d(self.x.shape[1], self.x.shape[1], kernel_size=1, stride=1, padding=0, bias=False) + self.ReLu = nn.ReLU() + + def forward(self): + x_phi_conv = self.phi_conv(self.x) + x_phi = x_phi_conv.view([x_phi_conv.shape[0], -1, self.M]) + x_phi = self.ReLu(x_phi) + x_phi_T = x_phi_conv.view([x_phi_conv.shape[0], self.M, -1]) + x_phi_T = self.ReLu(x_phi_T) + + x_glob_pool = self.glob_pool(self.x) + x_glob_conv = self.glob_conv(x_glob_pool) + x_glob_diag = torch.zeros(x_glob_conv.shape[0], x_glob_conv.shape[1], x_glob_conv.shape[1]).to(self.device) + + for i in range(x_glob_conv.shape[0]): + x_glob_diag[i, :, :] = torch.diag(x_glob_conv[i, :, :, :].reshape(1, x_glob_conv.shape[1])) + + A_tilde = torch.matmul(torch.matmul(x_phi, x_glob_diag), x_phi_T) + # print(A_tilde) + D_tilde = torch.zeros_like(A_tilde).to(self.device) + temp_sum = torch.sum(A_tilde, 2) + for i in range(D_tilde.shape[0]): + D_tilde[i, :, :] = torch.diag(temp_sum[i, :]) + # D_tilde = torch.diag(torch.sum(A_tilde, 2)) + + # D_inv = torch.inverse(torch.sqrt(D_tilde)) + # D_inv = torch.linalg.inv(torch.sqrt(D_tilde)) + D_inv = D_tilde - A_tilde ## To Fix + + I = torch.eye(D_inv.shape[1]).to(self.device) + I = I.repeat(D_inv.shape[0], 1, 1) + + L_tilde = I - torch.matmul(torch.matmul(D_inv, A_tilde), D_inv) + + output = torch.matmul(L_tilde, self.x.reshape(self.x.shape[0], -1, self.x.shape[1])) + # print(output) + output = output.reshape(self.x.shape[0], self.x.shape[1], self.x.shape[2], self.x.shape[3]) + output = self.graph_weight(output) + output = self.ReLu(output) + + return output + +class upconv(nn.Module): + def __init__(self, in_channels, out_channels, ratio=2): + super().__init__() + self.elu = nn.ELU() + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, bias=False, kernel_size=3, stride=1, padding=1) + self.ratio = ratio + + def forward(self, x): + up_x = F.interpolate(x, scale_factor=self.ratio, mode='nearest') + out = self.conv(up_x) + out = self.elu(out) + return out + +class SpyGRSS(nn.Module): + def __init__(self, device): + super().__init__() + + self.device = device + + self.pretrained_resnet = models.resnet50(pretrained=True).to(self.device) + + for param in self.pretrained_resnet.parameters(): + param.requires_grad_(False) + + ## Layer ## + self.reduce_dim = nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1, bias=False) + + self.down_samp = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) + + def forward(self, x): + x = self.pretrained_resnet.conv1(x) + x = self.pretrained_resnet.bn1(x) + x = self.pretrained_resnet.relu(x) + x = self.pretrained_resnet.maxpool(x) + x = self.pretrained_resnet.layer1(x) + x = self.pretrained_resnet.layer2(x) + x = self.pretrained_resnet.layer3(x) + x = self.pretrained_resnet.layer4(x) + x = self.reduce_dim(x) + + GR_1 = SpygrModule(x, self.device).to(self.device) + + x = self.down_samp(x) + + GR_2 = SpygrModule(x, self.device).to(self.device) + + x = self.down_samp(x) + + GR_3 = SpygrModule(x, self.device).to(self.device) + + x_gr_1 = GR_1.forward() + x_gr_2 = GR_2.forward() + x_gr_3 = GR_3.forward() + output = x_gr_2 + F.interpolate(x_gr_3, scale_factor=2, mode="nearest") + output = x_gr_1 + F.interpolate(output, scale_factor=2, mode="nearest") + + final_upsampling = upconv(512, 3, ratio=16).to(self.device) + output = final_upsampling.forward(output) + + return output + +def main(): + gc.collect() + torch.cuda.empty_cache() + device = torch.device("cuda") + + print(torch.cuda.is_available()) + print(torch.device('cuda:0')) + + temp = Image.open("D:/dataset/gtFine_trainvaltest/gtFine/train/aachen/aachen_000000_000019_gtFine_color.png").convert("RGB") + temp_img = np.array(temp) + temp_tf = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + temp_img = temp_tf(temp) + temp_img = torch.stack([temp_img, temp_img]).to(device) + + # temp_model = SpyGRSS(device) + temp_model = SpyGRSS(device) + temp_model.to(device) + temp_model.cuda() + output = temp_model.forward(temp_img) + # print(output.shape) + temp_tff = T.ToPILImage() + output = temp_tff(output[0, :, :, :]) + output.show() + + # temp_test = torch.randn(3, 3, 3) + # temp_test.to(device) + # print("success") + + + +if __name__ == "__main__": + main() \ No newline at end of file From 3e17df27e0dd9418fb22b7e97845be95bc5be675 Mon Sep 17 00:00:00 2001 From: SunWng Date: Sun, 22 Aug 2021 20:43:02 +0900 Subject: [PATCH 03/12] 2021-08-22 Model Debug Completed --- spygr.py | 35 ++++++++++++++++++---- spygr_dataloader.py | 63 ++++++++++++++++++++++++++++++++++++++++ spygr_main.py | 71 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 5 deletions(-) diff --git a/spygr.py b/spygr.py index c08f10a..5a14392 100644 --- a/spygr.py +++ b/spygr.py @@ -9,6 +9,9 @@ from torch import Tensor import torchvision.models as models +from PIL import Image +import torchvision.transforms as T + class GRModule(nn.Module): def __init__(self, x, device): super().__init__() @@ -23,14 +26,14 @@ def __init__(self, x, device): def forward(self): x_phi_conv = self.phi_conv(self.x) - x_phi = x_phi_conv.view([x_phi_conv.shapep[0], -1, self.M]) + x_phi = x_phi_conv.view([x_phi_conv.shape[0], -1, self.M]) x_phi = self.relu(x_phi) x_phi_T = x_phi_conv.view([x_phi_conv.shape[0], self.M, -1]) x_phi_T = self.relu(x_phi_T) x_glob_pool = self.glob_pool(self.x) x_glob_conv = self.glob_conv(x_glob_pool) - x_glob_diag = torch.zeros(x_glob_conv.shape[0], x_glob_conv.shape[1], x_glob_conv[1]).to(self.device) + x_glob_diag = torch.zeros(x_glob_conv.shape[0], x_glob_conv.shape[1], x_glob_conv.shape[1]).to(self.device) for i in range(x_glob_conv.shape[0]): x_glob_diag[i, :, :] = torch.diag(x_glob_conv[i, :, :, :].reshape(1, x_glob_conv.shape[1])) @@ -48,13 +51,13 @@ def forward(self): for i in range(diag_sum.shape[0]): diag_sqrt = 1.0 / torch.sqrt(diag_sum[i, :]) diag_sqrt[torch.isnan(diag_sqrt)] = 0 + diag_sqrt[torch.isinf(diag_sqrt)] = 0 D_sqrt_inv[i, :, :] = torch.diag(diag_sqrt) - + I = torch.eye(D_sqrt_inv.shape[1]).to(self.device) I = I.repeat(D_sqrt_inv.shape[0], 1, 1) L_tilde = I - torch.matmul(torch.matmul(D_sqrt_inv, A_tilde), D_sqrt_inv) - out = torch.matmul(L_tilde, self.x.reshape(self.x.shape[0], -1, self.x.shape[1])) out = out.reshape(self.x.shape[0], self.x.shape[1], self.x.shape[2], self.x.shape[3]) out = self.graph_weight(out) @@ -115,4 +118,26 @@ def forward(self, x): final_upsampling = upconv(512, 3, ratio=64).to(self.device) out = final_upsampling.forward(out) - return out \ No newline at end of file + return out + +if __name__ == "__main__": + gc.collect() + torch.cuda.empty_cache() + device = torch.device("cuda") + + temp = Image.open("D:/dataset/gtFine_trainvaltest/gtFine/train/aachen/aachen_000000_000019_gtFine_color.png").convert("RGB") + temp_tf = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + temp_img = temp_tf(temp) + temp_img = torch.stack([temp_img, temp_img]).to(device) + + temp_model = SpyGR(device) + temp_model.to(device) + output = temp_model.forward(temp_img) + print("output size: ", output.shape) + + temp_tff = T.ToPILImage() + output = temp_tff(output[0, :, :, :]) + output.show() \ No newline at end of file diff --git a/spygr_dataloader.py b/spygr_dataloader.py index e69de29..30db28c 100644 --- a/spygr_dataloader.py +++ b/spygr_dataloader.py @@ -0,0 +1,63 @@ +import os + +import numpy as np +from PIL import Image +from torch.utils.data import DataLoader, Dataset + +root_dir = "" +class_num = [] + +def data_list(root, quality, mode): + # mode : train / val / test + if quality == "fine": + img_dir_name = "leftImg8bit_trainvaltest" + label_path = os.path.join(root, "grFine_trainvaltest", "gtFine", mode) + label_suffix = "_gtFine_labelIds.png" + img_suffix = "_leftImg8bit.png" + img_path = os.path.join(root, img_dir_name, "leftImg8bit", mode) + + items = [] + categories = os.listdir(img_path) + + for c_idx in categories: + c_items = [name.split(img_suffix)[0] for name in os.listdir(os.path.join(img_path, c_idx))] + for item_idx in c_items: + item = (os.path.join(img_path, c_idx, item_idx+img_suffix), os.path.join(label_path, c_idx, item_idx, label_suffix)) + items.append(item) + + return items + +class CityScapesData(Dataset): + def __init__(self, root_dir, quality, mode, transform): + self.quality = quality + self.mode = mode + self.transform = transform + self.img_list = data_list(root_dir, quality, mode) + ignore_label = 255 + self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label, + 3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label, + 7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4, + 14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5, + 18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, + 28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18} + self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1] + self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33] + + def __len__(self): + return len(self.img_list) + + def __getitem__(self, index): + img_path, lab_path = self.img_list[index] + image = Image.open(img_path).convert("RGB") + label = Image.open(lab_path) + + label = np.array(label) + label_copy = label.copy() + for k, v in self.id_to_trainid.items(): + label_copy[label==k] = v + label = Image.fromarray(label_copy.astype(np.uint8)) + + if self.transform is not None: + image = self.transform(image) + + return image, label \ No newline at end of file diff --git a/spygr_main.py b/spygr_main.py index e69de29..e41c6db 100644 --- a/spygr_main.py +++ b/spygr_main.py @@ -0,0 +1,71 @@ +import os +import numpy as np +import pandas as pd + +import gc +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import DataLoader +from torch import Tensor + +import torchvision.models as models +import torchvision.transforms as T +from PIL import Image + +import spygr +import spygr_dataloader + +def main(): + transforms_train = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + transforms_val = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + transforms_test = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + root_dir = "D:/dataset" + + train_set = spygr_dataloader(root_dir, "fine", "train", transforms_train) + valid_set = spygr_dataloader(root_dir, "fine", "val", transforms_train) + test_set = spygr_dataloader(root_dir, "fine", "test", transforms_test) + + train_batch_size = 256 + valid_batch_size = 64 + test_batch_size = 32 + + train_loader = DataLoader(train_set, train_batch_size, num_workers=1) + valid_loader = DataLoader(valid_set, valid_batch_size, num_workers=1) + test_loader = DataLoader(test_set, test_batch_size, num_workers=1) + + device = torch.device("cuda") + + model = spygr(device).to(device) + + optimizer = optim.Adam(model.parameters(), lr=1e-3) + criterion = nn.MultiLabelSoftMarginLoss() + + num_epochs = 10 + model.train() + + for epoch in range(num_epochs): + for i, (images, labels) in enumerate(train_loader): + optimizer.zero_grad() + + images = images.to(device) + labels = labels.to(device) + + outputs = model(images) + + +if __name__ == "__main__": + main() \ No newline at end of file From dfe87aeb9e3ff00b6c58e33169b0359aef9f66c4 Mon Sep 17 00:00:00 2001 From: SunWng Date: Fri, 27 Aug 2021 19:45:55 +0900 Subject: [PATCH 04/12] train script update --- .vscode/settings.json | 5 ++ spygr.py | 3 +- spygr_dataloader.py | 19 +++++--- spygr_main.py | 104 ++++++++++++++++++++++++++++++++++++------ 4 files changed, 107 insertions(+), 24 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4672d17 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python.linting.pylintEnabled": true, + "python.linting.enabled": true, + "python.linting.pycodestyleEnabled": false +} \ No newline at end of file diff --git a/spygr.py b/spygr.py index 5a14392..907e051 100644 --- a/spygr.py +++ b/spygr.py @@ -114,8 +114,7 @@ def forward(self, x): out = x_gr_2 + F.interpolate(x_gr_3, scale_factor=2, mode="nearest") out = x_gr_1 + F.interpolate(out, scale_factor=2, mode="nearest") - - final_upsampling = upconv(512, 3, ratio=64).to(self.device) + final_upsampling = upconv(512, 3, ratio=32).to(self.device) out = final_upsampling.forward(out) return out diff --git a/spygr_dataloader.py b/spygr_dataloader.py index 30db28c..ed0d823 100644 --- a/spygr_dataloader.py +++ b/spygr_dataloader.py @@ -33,16 +33,21 @@ def __init__(self, root_dir, quality, mode, transform): self.mode = mode self.transform = transform self.img_list = data_list(root_dir, quality, mode) - ignore_label = 255 - self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label, - 3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label, - 7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4, - 14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5, - 18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, - 28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18} + self.ignore_label = 255 + self.id_to_trainid = {-1: self.ignore_label, 0: self.ignore_label, 1: self.ignore_label, 2: self.ignore_label, + 3: self.ignore_label, 4: self.ignore_label, 5: self.ignore_label, 6: self.ignore_label, + 7: 0, 8: 1, 9: self.ignore_label, 10: self.ignore_label, 11: 2, 12: 3, 13: 4, + 14: self.ignore_label, 15: self.ignore_label, 16: self.ignore_label, 17: 5, + 18: self.ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, + 28: 15, 29: self.ignore_label, 30: self.ignore_label, 31: 16, 32: 17, 33: 18} self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1] self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33] + self.class_names = ['road', 'sidewalk', 'building', 'wall', 'fence', \ + 'pole', 'traffic_light', 'traffic_sign', 'vegetation', 'terrain', \ + 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', \ + 'motorcycle', 'bicycle'] + def __len__(self): return len(self.img_list) diff --git a/spygr_main.py b/spygr_main.py index e41c6db..bbca2f6 100644 --- a/spygr_main.py +++ b/spygr_main.py @@ -9,13 +9,55 @@ import torch.optim as optim from torch.utils.data import DataLoader from torch import Tensor - +from torch.backends import cudnn import torchvision.models as models import torchvision.transforms as T from PIL import Image +import tqdm import spygr -import spygr_dataloader +from spygr_dataloader import * + +device = torch.device("cuda") + + +def evaluate_model(val_dataloader: CityScapesData, model: spygr, criterion): + + eval_iou_score = 0 + eval_loss = 0 + + for sample_set in tqdm(val_dataloader): + with torch.no_grad(): + eval_images = sample_set["image"].to(device) + eval_labels = sample_set["lable"].to(device) + + eval_outputs = model(eval_images) + model.eval() + eval_loss = criterion(eval_outputs, eval_labels) + eval_outputs = F.softmax(eval_outputs, dim=1) + eval_outputs = torch.argmax(eval_outputs, dim=1) + eval_outputs = eval_outputs.contiguous().view(-1) + eval_labels = eval_labels.contiguous().view(-1) + + iou_per_class = [] + for num_class in range(len(val_dataloader.class_names)): + true_class = (eval_outputs == num_class) + true_label = (eval_labels == num_class) + if true_label.long().sum().item() == 0: + iou_per_class.append(np.nan) + else: + intersect = torch.logical_and( + true_class, true_label).sum().float().item() + union = torch.logical_and( + true_class, true_label).sum().float().item() + + iou = (intersect + 1e-10) / (union + 1e-10) + iou_per_class.append(iou) + eval_iou_score += np.nanmean(iou_per_class) + eval_loss += eval_loss + + return eval_loss, eval_iou_score + def main(): transforms_train = T.Compose([ @@ -33,39 +75,71 @@ def main(): T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) - root_dir = "D:/dataset" + root_dir = "D:/dataset" ### - train_set = spygr_dataloader(root_dir, "fine", "train", transforms_train) - valid_set = spygr_dataloader(root_dir, "fine", "val", transforms_train) - test_set = spygr_dataloader(root_dir, "fine", "test", transforms_test) + train_set = CityScapesData(root_dir, "fine", "train", transforms_train) + valid_set = CityScapesData(root_dir, "fine", "val", transforms_val) + test_set = CityScapesData(root_dir, "fine", "test", transforms_test) - train_batch_size = 256 - valid_batch_size = 64 - test_batch_size = 32 + train_batch_size = 256 # Control by YAML + valid_batch_size = 64 # Control by YAML + test_batch_size = 32 # Control by YAML train_loader = DataLoader(train_set, train_batch_size, num_workers=1) valid_loader = DataLoader(valid_set, valid_batch_size, num_workers=1) test_loader = DataLoader(test_set, test_batch_size, num_workers=1) - device = torch.device("cuda") + # device = torch.device("cuda") model = spygr(device).to(device) - optimizer = optim.Adam(model.parameters(), lr=1e-3) - criterion = nn.MultiLabelSoftMarginLoss() + optimizer = optim.Adam(model.parameters(), lr=1e-3) # Control by YAML + criterion = nn.CrossEntropyLoss(ignore_index=train_set.ignore_label).cuda() - num_epochs = 10 + num_epochs = 80 # Control by YAML model.train() + num_params = sum([np.prod(p.shape) for p in model.parameters()]) + print("Total number of parameters: {}".format(num_params)) + + num_params_update = sum([np.prod(p.shape) + for p in model.parameters() if p.requires_grad]) + print("Total number of learning parameters: {}".format(num_params_update)) + + cudnn.benchmark = True + + global_step = 0 + steps_per_epoch = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): optimizer.zero_grad() - + images = images.to(device) labels = labels.to(device) outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + + optimizer.step() + + print("[epoch][s/s_per_e/global_step]: [{}/{}][{}/{}/{}], loss: {:.12f}".format( + epoch+1, num_epochs, i+1, steps_per_epoch, global_step+1, loss)) + + checkpoint = {"global_step": global_step, + "model": model.state_dict(), + "optimizer": optimizer.state_dict()} + + torch.save(checkpoint, os.path.join( + "dir", "model_name", "-{:07d}.pth".format(global_step))) + + eval_loss, eval_iou_score = evaluate_model(valid_loader, model, criterion) + print("Epoch: {}/{} | validation average loss: {:.5f} | evaluation mIoU: {:.5f}".format(epoch, num_epochs, eval_loss/len(valid_loader, eval_iou_score/len(valid_loader)))) + model.train() + + global_step += 1 + if __name__ == "__main__": - main() \ No newline at end of file + main() From d2c8f49bd60b09e5f89552e1db94c8ad2d361041 Mon Sep 17 00:00:00 2001 From: SunWng Date: Mon, 30 Aug 2021 22:29:13 +0900 Subject: [PATCH 05/12] train code debug --- __pycache__/spygr.cpython-38.pyc | Bin 0 -> 5128 bytes __pycache__/spygr_dataloader.cpython-38.pyc | Bin 0 -> 3220 bytes spygr.py | 21 ++++++--- spygr_dataloader.py | 46 +++++++++++++------- spygr_main.py | 24 +++++----- 5 files changed, 57 insertions(+), 34 deletions(-) create mode 100644 __pycache__/spygr.cpython-38.pyc create mode 100644 __pycache__/spygr_dataloader.cpython-38.pyc diff --git a/__pycache__/spygr.cpython-38.pyc b/__pycache__/spygr.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a70871c8bc02131dd2f67e5d84e8c30bc6a131bc GIT binary patch literal 5128 zcmb7IO^h5z74HA;`Q5+qde@1=CfLDalC{?f2{B3R#Ey+2ULt3MqE=9=GgUh~b+n?4rrwu5GN#h#K{sz4jd47q~we!gmU13_Qn|`;Ct0QyR*bO(4&6!>eZ|N z_w)69P^*35zH4Z@)-U=5;db|_?CG4#PP58b z_fPY=>={-a>)F>>4fPe?$ezXf+*sp{?$=qJ&9K?F$>!KRTR_>^H`)gD&hRsQ<)QAM z<#zTwKg-XOH2VfWjkdFViJwN>IespeVCHjKlb=P~dHzjyf}iic@X&y?k-a$9{8hfn z&oi7Z(|=-2(@kqx!#L;p`RoF%n(6Or&BX({q4lONoedtv0|Cyy)eqaeMGe!BSybXP zV7R(E-}pq^#6-+^l4k=d>ez?(z|)*_JECgyfx`fgAh-~KxO<}kVJc^ zWjZY?)l)4nQUlav#9N8obyBTsbX{gFYv3%*W8jBN+(%2`C3aRSG#sTty5_PrrcugI z=uf>}TyWmM@+FO8E;Y-MUKR7y##lj6XBu`_T0gjowI9HkwFBD9#$!uX?s5_FUJ!S7 zxpd-0bQqWJFl4L~wWalOCyWma=nCp}qd6n(cr@ffdO^^MI!O>n{eg5C-|n=y)Ne~8 zillRG5N%&#P~8fLTzbQ;PSAo#mfqTKzc(24&{k^q1|J7QR0>upRNEpPZUy&wr@fU( z>kfbS18E7~8#PU7#k{vEW@#;RAdhuf3W8QIjN>49to`#WyZlib3%VbF^!{+?=A9Q~ z-tO}#36sts`Y0amw8iRhN6errjSs zc(37&BOFYsWpu~-Sl`oAJu_0h3p-fW81AxbwN1#uWp)Y&0XLdbv zJPW*=THx%&gT>hS$f|_3Ove+OijsR} zb!HxpgB7OEam`Khb?x3SF~$tNuyO}BYL1OP5B1s9gVRsuE2}aMHuoTdcD}a~c4MTav9YA>)i5rd+(L?= zI5qA}TBEzk>O`mRnRuFwPNt5&bUF)Z{ittRpYBuiraR2X!A%rg38Pqh7(ZL4{))2; zt_~kAuPaA9_zxc8zz9VRq>UHPKd$VaTo2b*u5Izw-E-H2_?}4MrQ6NRE4z)OhIfOc z(_@^Zvq#dwD1k({7^itkETT}pCavubzb{Tuom@sdO$6Ce6U!jdP6nd2CGA}<2C+0y zBj=R12%|Qa7VCs<=?b1l0;eA){ZUVrm3GkU+~v}YM}7D=tRg4Rf|Ks%K->=ndkU5KGKi*E^(BNE zyhat}Ib&YGZZwQ}rwZhy;@O{7AnUcN*DC} zKe^Ny4e7L7H1gz8v!Fub#jk)IWzxjROqH#y#OT_{s{{rN@>FK2yy>KfjsYSWW$N`C z%2bsPG#y##L_upSjF8G=S;fU1)3kY?VE5xY-r7>^fjR`PNf}y+LoypJQD5+3Roji0X zHeMPoJ`a*0ED*5hsw3DpRSGdx3ZVpHAp}#ZFmVv(x^yER!o!lshOEY|u*ZYVumwky z77T;~)aWY`t10c3M6g&}O zC*tfzhtOt6gN+;+xgdK|(OzYMrun}%ZR7%F-}6)Ut>SSh8ufFqBptE}c!>v_iFop@ zi-OKt&K=Dk=1FbOOKm0d zj>)v_1R+RZ$wln+B)#NQ`{v=!OKAz1QR$=}f}Oj*TR%c94+yUi{1MlQyaRHu0@Bn~ zgljH|_fV5I#qu?2ejKeSqmynwe4su&oL;!Y#hT2^OGWuoQNAoLqPsXlWR=Jok@tyE zTv0ejxsD*yR0&snm$+L*en^BuoU-d?InN<^jJ9Mk}=DCe)?DQ=1ydFv@c9ktw(ihz!@ z{5}T#>bU+rv~J)n;(EOxj+TG?XZ_1 zbrrG;N_HU(TU$ITir}JpRI&D2P;_n$dINmV;S<%obMx(X`A@$dzES?-O8bx7fB4m@ z3xB=R{>8PQ_5S{^KVNB|_Stv7_`&B_+B*l&uX(@u>`MFd&(w3JX&^TSb*lrD= zVK_=w=tKDOO7o($5i*B6_@Et1XBb8-1iT$GX|!9)CZwm3?`jm~2x4JUUELf-E&AvT z0d$XeWgD`RYYm18qNS}0K~O->tA=er-$AsRM`*E54d#2d-W4Wsi_3fRl^xOFEPr}w+<dS=EG{QdLq z*Tbb*A^w5N(a++_eNOob0|}Ce@Ta--jk<37Cg($~UV<5&( zA$HBOF_B}pbRUXmtK;UML|>9i%~vjDd%YTEb7D;7L-A6Mz0zaNJLA@&tZaQR&WOpr zwpBLER%xT9Z;acevul;@iIifa*!X4X@V>nzZS~_c^3Ntwzj!M;dyP&uafOxL|Wy;FOH6ln;*+k?uzflM3Ois{%UMkyjZEw6L*x4lDObIGP*alm4?IUwe-DsI0*91em{Qk zTKwxKeX}y)t$dfSERVJyJ-LxbgJF~wVG(EPN{^}-~-$Ax{}j;)O>*VZU3o@;9@i&O1DNU_q5a5Rci>da_M zWm%z}jm?l3zOAicMv>pfidiqf4s>%E=Ia55@mqCqx)<$2s~+8si)ff@uNM~4AX9Oa z`*tsg>4oob2AS_7LzA{pQf=OZY5`!ax_Ph_7;b)nK^U&I;PWg3)Ncen3?2VEP_AV3l&N*8t5Nl6$zyx$m zn>~?b@HX8bn0(ccs2*4O-I50>K?31^Q>{2&d}Mx9BWNS zn~t^n%)X*J0hj~41DI#%T&aHKubxG{&kgDl-~+&Az!d;)O??Eo3iuds4bTC60{9dF zgHbmCUBG96n+z4azd8vx1%UrmX8>mb=K$b_>RkYgvbqR(4}euwmjLyu7P+K;3iupw z3ve6o1>j4-9l%$ByMV6&-vE9BxCi(d;64L9Hze)mF-6~n$+~t|H{*n2<}{1ziMIPu z+Kbr#vn29om^qn%ddwDs=Y`+iujcU})!tS#h^7Rjogz{oS#9R)+qyA|RGy{URy>Q+ zW-nCQF4SgkU7M?$Im*!WhFOuR-gYmEw6hx5g-%Q1$P|uDH(4*u`00KAdh#V z+KHzu0$$Q>UQ4E;pk{166H`!RQ*w{VZ*OJX&`ZKRXLgz47;sqCt-8HttGv_DRvsmN z?Sk3BVu$NcPq3n8K+Qij)coTHL7Xxx2e`RhF$me%`)4_yla{D|X)yDmgxuI`MPApB zB5NM#{o8F^mDdfg?mg{!Odsw4**SBZgSz``Iu@7UQE;f5jNmZ^$E7v-k{=7NGmI2K2LX1Fzu`eT#1 zke9&0(JT$l4e(&!SQhGctk^NNe5$=CVM^KX0bhT81s$tio;Ep*aNOY-!_fk^QS{$j za*M5py9N-$M{;n6kZq0zuC;Uijl~~P?$!H)GJoJ0&`!O9T`utA`Dt`ctQccFjPNYt z&Fq>ac}rGZH@U>r&d(F8v?m5@T4TGgcN<)T#NKr#VhqZ5cbgNz&z_U41F9wqzbi)HX|KrJ7fpuNZ!jv+v+#msz6h<>9fDGn?BcGU-4T=~}iG=lp)_9`GE$h7QZ>_s~Xos3-hi>(lPybc6Tc;5ovc%MTux MjeJhF4fo`K0XdxzE&u=k literal 0 HcmV?d00001 diff --git a/spygr.py b/spygr.py index 907e051..bac6a99 100644 --- a/spygr.py +++ b/spygr.py @@ -53,12 +53,14 @@ def forward(self): diag_sqrt[torch.isnan(diag_sqrt)] = 0 diag_sqrt[torch.isinf(diag_sqrt)] = 0 D_sqrt_inv[i, :, :] = torch.diag(diag_sqrt) - + print("NaN Check(D_sqrt_inv): ", torch.isnan(D_sqrt_inv).any()) I = torch.eye(D_sqrt_inv.shape[1]).to(self.device) I = I.repeat(D_sqrt_inv.shape[0], 1, 1) L_tilde = I - torch.matmul(torch.matmul(D_sqrt_inv, A_tilde), D_sqrt_inv) + print("NaN Check(L_tilde): ", torch.isnan(L_tilde).any()) out = torch.matmul(L_tilde, self.x.reshape(self.x.shape[0], -1, self.x.shape[1])) + print("NaN Check(out): ", torch.isnan(out).any()) out = out.reshape(self.x.shape[0], self.x.shape[1], self.x.shape[2], self.x.shape[3]) out = self.graph_weight(out) out = self.relu(out) @@ -80,9 +82,10 @@ def forward(self, x): return out class SpyGR(nn.Module): - def __init__(self, device): + def __init__(self, device, num_class=19): super().__init__() self.device = device + self.num_class = num_class self.pretrained_resnet = models.resnet50(pretrained=True).to(self.device) for param in self.pretrained_resnet.parameters(): @@ -90,7 +93,9 @@ def __init__(self, device): self.reduce_dim = nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1, bias=False) self.down_samp = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) - + self.classification = nn.Conv2d(512, self.num_class, kernel_size=1, padding=0) + self.relu = nn.ReLU() + def forward(self, x): x = self.pretrained_resnet.conv1(x) x = self.pretrained_resnet.bn1(x) @@ -101,7 +106,7 @@ def forward(self, x): x = self.pretrained_resnet.layer3(x) x = self.pretrained_resnet.layer4(x) x = self.reduce_dim(x) - + x = self.relu(x) GR_1 = GRModule(x, self.device).to(self.device) x = self.down_samp(x) GR_2 = GRModule(x, self.device).to(self.device) @@ -109,12 +114,16 @@ def forward(self, x): GR_3 = GRModule(x, self.device).to(self.device) x_gr_1 = GR_1.forward() + x_gr_1 = self.relu(x_gr_1) x_gr_2 = GR_2.forward() + x_gr_2 = self.relu(x_gr_2) x_gr_3 = GR_3.forward() - + x_gr_3 = self.relu(x_gr_3) + out = x_gr_2 + F.interpolate(x_gr_3, scale_factor=2, mode="nearest") out = x_gr_1 + F.interpolate(out, scale_factor=2, mode="nearest") - final_upsampling = upconv(512, 3, ratio=32).to(self.device) + out = self.classification(out) + final_upsampling = upconv(self.num_class, self.num_class, ratio=32).to(self.device) out = final_upsampling.forward(out) return out diff --git a/spygr_dataloader.py b/spygr_dataloader.py index ed0d823..b3d660e 100644 --- a/spygr_dataloader.py +++ b/spygr_dataloader.py @@ -1,18 +1,17 @@ import os import numpy as np +import torch from PIL import Image from torch.utils.data import DataLoader, Dataset - -root_dir = "" -class_num = [] +import torchvision.transforms as T def data_list(root, quality, mode): # mode : train / val / test if quality == "fine": img_dir_name = "leftImg8bit_trainvaltest" - label_path = os.path.join(root, "grFine_trainvaltest", "gtFine", mode) - label_suffix = "_gtFine_labelIds.png" + mask_path = os.path.join(root, "gtFine_trainvaltest", "gtFine", mode) + mask_suffix = "_gtFine_labelIds.png" img_suffix = "_leftImg8bit.png" img_path = os.path.join(root, img_dir_name, "leftImg8bit", mode) @@ -22,16 +21,17 @@ def data_list(root, quality, mode): for c_idx in categories: c_items = [name.split(img_suffix)[0] for name in os.listdir(os.path.join(img_path, c_idx))] for item_idx in c_items: - item = (os.path.join(img_path, c_idx, item_idx+img_suffix), os.path.join(label_path, c_idx, item_idx, label_suffix)) + item = (os.path.join(img_path, c_idx, item_idx+img_suffix), os.path.join(mask_path, c_idx, item_idx+mask_suffix)) items.append(item) return items class CityScapesData(Dataset): - def __init__(self, root_dir, quality, mode, transform): + def __init__(self, root_dir, quality, mode, transform, crop_size): self.quality = quality self.mode = mode self.transform = transform + self.crop_size = crop_size self.img_list = data_list(root_dir, quality, mode) self.ignore_label = 255 self.id_to_trainid = {-1: self.ignore_label, 0: self.ignore_label, 1: self.ignore_label, 2: self.ignore_label, @@ -51,18 +51,32 @@ def __init__(self, root_dir, quality, mode, transform): def __len__(self): return len(self.img_list) + def random_crop(self, img, mask): + crop_tf = T.RandomCrop(self.crop_size) + img = crop_tf(img) + mask = crop_tf(mask) + # i, j, h, w = T.RandomCrop.get_params(img, output_size=self.crop_size) + # img = T.RandomCrop(img, i, j, h, w) + # mask = T.RandomCrop(mask, i, j, h, w) + return img, mask + def __getitem__(self, index): - img_path, lab_path = self.img_list[index] - image = Image.open(img_path).convert("RGB") - label = Image.open(lab_path) + img_path, mask_path = self.img_list[index] + img = Image.open(img_path).convert("RGB") + mask = Image.open(mask_path) + img, mask = self.random_crop(img, mask) - label = np.array(label) - label_copy = label.copy() + mask = np.array(mask) + mask_copy = mask.copy() for k, v in self.id_to_trainid.items(): - label_copy[label==k] = v - label = Image.fromarray(label_copy.astype(np.uint8)) + mask_copy[mask==k] = v + # label = Image.fromarray(label_copy.astype(np.uint8)) + # label = np.array(label, dtype=np.float32) + # label = torch.from_numpy(np.array(label_copy, dtype=np.int32)).long() if self.transform is not None: - image = self.transform(image) + img = self.transform(img) + + mask = torch.from_numpy(np.array(mask_copy, dtype=np.int32)).long() - return image, label \ No newline at end of file + return img, mask \ No newline at end of file diff --git a/spygr_main.py b/spygr_main.py index bbca2f6..b208df2 100644 --- a/spygr_main.py +++ b/spygr_main.py @@ -15,13 +15,14 @@ from PIL import Image import tqdm -import spygr +from spygr import * from spygr_dataloader import * -device = torch.device("cuda") +from torch.backends import cudnn +device = torch.device("cuda") -def evaluate_model(val_dataloader: CityScapesData, model: spygr, criterion): +def evaluate_model(val_dataloader: CityScapesData, model: SpyGR, criterion): eval_iou_score = 0 eval_loss = 0 @@ -75,13 +76,13 @@ def main(): T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) - root_dir = "D:/dataset" ### + root_dir = "D:/dataset" ### Directory of dataset - train_set = CityScapesData(root_dir, "fine", "train", transforms_train) - valid_set = CityScapesData(root_dir, "fine", "val", transforms_val) - test_set = CityScapesData(root_dir, "fine", "test", transforms_test) + train_set = CityScapesData(root_dir, "fine", "train", transforms_train, (768,768)) + valid_set = CityScapesData(root_dir, "fine", "val", transforms_val, (768,768)) + test_set = CityScapesData(root_dir, "fine", "test", transforms_test, (768,768)) - train_batch_size = 256 # Control by YAML + train_batch_size = 16 # Control by YAML valid_batch_size = 64 # Control by YAML test_batch_size = 32 # Control by YAML @@ -91,9 +92,9 @@ def main(): # device = torch.device("cuda") - model = spygr(device).to(device) + model = SpyGR(device).to(device) - optimizer = optim.Adam(model.parameters(), lr=1e-3) # Control by YAML + optimizer = optim.Adam(model.parameters(), lr=1e-4) # Control by YAML criterion = nn.CrossEntropyLoss(ignore_index=train_set.ignore_label).cuda() num_epochs = 80 # Control by YAML @@ -132,7 +133,7 @@ def main(): "optimizer": optimizer.state_dict()} torch.save(checkpoint, os.path.join( - "dir", "model_name", "-{:07d}.pth".format(global_step))) + "D:/model", "spygr", "-{:07d}.pth".format(global_step))) eval_loss, eval_iou_score = evaluate_model(valid_loader, model, criterion) print("Epoch: {}/{} | validation average loss: {:.5f} | evaluation mIoU: {:.5f}".format(epoch, num_epochs, eval_loss/len(valid_loader, eval_iou_score/len(valid_loader)))) @@ -140,6 +141,5 @@ def main(): global_step += 1 - if __name__ == "__main__": main() From 2535b012641eee682b4dd71492976528612f4538 Mon Sep 17 00:00:00 2001 From: SunWng Date: Mon, 30 Aug 2021 22:36:09 +0900 Subject: [PATCH 06/12] delete debugging code --- spygr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/spygr.py b/spygr.py index bac6a99..1b74672 100644 --- a/spygr.py +++ b/spygr.py @@ -53,14 +53,11 @@ def forward(self): diag_sqrt[torch.isnan(diag_sqrt)] = 0 diag_sqrt[torch.isinf(diag_sqrt)] = 0 D_sqrt_inv[i, :, :] = torch.diag(diag_sqrt) - print("NaN Check(D_sqrt_inv): ", torch.isnan(D_sqrt_inv).any()) I = torch.eye(D_sqrt_inv.shape[1]).to(self.device) I = I.repeat(D_sqrt_inv.shape[0], 1, 1) L_tilde = I - torch.matmul(torch.matmul(D_sqrt_inv, A_tilde), D_sqrt_inv) - print("NaN Check(L_tilde): ", torch.isnan(L_tilde).any()) out = torch.matmul(L_tilde, self.x.reshape(self.x.shape[0], -1, self.x.shape[1])) - print("NaN Check(out): ", torch.isnan(out).any()) out = out.reshape(self.x.shape[0], self.x.shape[1], self.x.shape[2], self.x.shape[3]) out = self.graph_weight(out) out = self.relu(out) From fcca632a5229680c3b6ce3b677330a69d4527d53 Mon Sep 17 00:00:00 2001 From: SunWng Date: Thu, 2 Sep 2021 11:31:18 +0900 Subject: [PATCH 07/12] validation code update --- __pycache__/spygr.cpython-38.pyc | Bin 5128 -> 4841 bytes spygr_main.py | 14 +++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/__pycache__/spygr.cpython-38.pyc b/__pycache__/spygr.cpython-38.pyc index a70871c8bc02131dd2f67e5d84e8c30bc6a131bc..13197d6f2685df20bad0d1dba7614cf60905856e 100644 GIT binary patch delta 845 zcmaKq&1(}u6u@`(V|KFH-6Wf&NhvL<(AtH9A4yeOv9)TfRsyvZq#mMXgEfXUOg7+Q zLZy1}QV{0iNjG}*AQJG8$)OiP$gLoF6MeId0R^4q{q}v$&fE9q<=kM#zGGR21gmj4 z|6pb8lbu)5+1{$Uk0wZvW|5lWDdSQX1qPF9(Jtzk%nZi(H_hdz@k#y!yHN~rwPQ0W z$ndu~J*$@}f)5UU!lD>7Y5^XBz&cu{I=tL0pU_|M5Bg_R;17wbXHiPdqB;JWTtf@J zL$ZP9FGAk7x!d;3m7Qv%QLVS=WpL3cft;BI)E3C|5vQDu5sp)1}_CXiP@y{FOC@Zu5^;5pD7B zR%H)HdN1%ajZa3e7pxdB_~XrOCgiDHHWwQaiA zyPL=$ROC(P47$bNIjiGg0V0#02lVyIZoOWu__WM_IDS%8)5mZbn{B_ZHRDDx#p zn|^QH5=paL6kn0iq%AwDmvFTX@%lU5Ax}8)Q?6DQY+--m77XoHL!It@;V$LSj_5vKlNTwhfo_23*MbmUM0ztDF^Ld-BlyeyUU{Wu`W2 z)n~csa_c#D%A3s>Rw1)*_7aO_XR~3~FdFSew{{$ePz8=?U17)a>pp3lZZ@40q0-!A zS>*#Mu&;`xn^{H}=^)xsz`D4zUEZbR=w|}><#}+I*t<|xyaN`isL8|x7_%^2B0wc! znNDMBl8@}EUOjL$n>Zn_&n57B!)JE|uT)3BO~4Y)tpDj8!v#e> za>#p8{D%r2Dq`mqm$Sm9?7_ui(dE5(higmzV4&qXaN3eB9|^(P zHCOZEXBt<922@J>Mz5Q8v6pnX4?2yQ0i0$y2w24L3M)+?v7g4|+)YehL)<`&AyyH1 znKXvDj#xwB<jXKy3JMHE$Gw(bEq68>kHBm~2RfrQDczIB1@ zG%~0?gkl=>*aCQuVV?JxVbFzcKAIwAo-N15NQu3S6$kvSppxDNoG8_;Mx$1BXoY=` eIWgSVi45Pz_KrjG*sK~aOZm7Fzs)|!ul@$xwEM;Y diff --git a/spygr_main.py b/spygr_main.py index b208df2..df660f4 100644 --- a/spygr_main.py +++ b/spygr_main.py @@ -13,7 +13,7 @@ import torchvision.models as models import torchvision.transforms as T from PIL import Image -import tqdm +from tqdm import tqdm from spygr import * from spygr_dataloader import * @@ -22,15 +22,15 @@ device = torch.device("cuda") -def evaluate_model(val_dataloader: CityScapesData, model: SpyGR, criterion): +def evaluate_model(val_dataloader, model: SpyGR, criterion): eval_iou_score = 0 eval_loss = 0 - for sample_set in tqdm(val_dataloader): + for images, masks in tqdm(val_dataloader): with torch.no_grad(): - eval_images = sample_set["image"].to(device) - eval_labels = sample_set["lable"].to(device) + eval_images = images.to(device) + eval_labels = masks.to(device) eval_outputs = model(eval_images) model.eval() @@ -135,11 +135,11 @@ def main(): torch.save(checkpoint, os.path.join( "D:/model", "spygr", "-{:07d}.pth".format(global_step))) + global_step += 1 + eval_loss, eval_iou_score = evaluate_model(valid_loader, model, criterion) print("Epoch: {}/{} | validation average loss: {:.5f} | evaluation mIoU: {:.5f}".format(epoch, num_epochs, eval_loss/len(valid_loader, eval_iou_score/len(valid_loader)))) model.train() - global_step += 1 - if __name__ == "__main__": main() From e8d429b8329e84239912cb363aa288f56ac39aa7 Mon Sep 17 00:00:00 2001 From: MKHan91 Date: Sat, 4 Sep 2021 20:15:53 +0900 Subject: [PATCH 08/12] Adding the code: model_just_loaded --- msaspp_main.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/msaspp_main.py b/msaspp_main.py index f2c1732..9121c60 100644 --- a/msaspp_main.py +++ b/msaspp_main.py @@ -168,6 +168,7 @@ def main_worker(ngpus_per_node, args): global_step = 0 optimizer = torch.optim.Adam(model.parameters(), weight_decay=args.weight_decay, lr=args.learning_rate) + model_just_loaded = False if args.checkpoint_path != '': if os.path.isfile(args.checkpoint_path): print("Loading checkpoint '{}'".format(args.checkpoint_path)) @@ -185,6 +186,7 @@ def main_worker(ngpus_per_node, args): else: print("No checkpoint found at '{}'".format(args.checkpoint_path)) + model_just_loaded = True if args.retrain: global_step = 0 @@ -236,7 +238,7 @@ def main_worker(ngpus_per_node, args): train_loss_list.append(loss) duration += time.time() - befor_op_time - if global_step and global_step % args.log_freq == 0: + if global_step and global_step % args.log_freq == 0 and not model_just_loaded: examples_per_sec = args.batch_size / duration * args.log_freq duration = 0 time_sofar = (time.time() - start_time) / 3600 @@ -264,7 +266,7 @@ def main_worker(ngpus_per_node, args): 'optimizer': optimizer.state_dict()} torch.save(checkpoint, os.path.join(args.log_directory, args.model_name, 'model', 'model-{:07d}.pth'.format(global_step))) - if global_step and global_step % args.save_freq == 0: + if global_step and global_step % args.save_freq == 0 and not model_just_loaded: if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): eval_loss_sum, eval_iou_sum_score = evaluate_model(val_dataloader, model, criterion) mIoU = eval_iou_sum_score / len(val_dataloader.data) @@ -294,6 +296,8 @@ def main_worker(ngpus_per_node, args): torch.save(checkpoint, os.path.join(args.log_directory, args.model_name, 'eval_model', 'model-{:07d}_mIoU-{:.3f}.pth'.format(global_step, max(mIoU_list)))) model.train() + + model_just_loaded = False global_step += 1 epoch += 1 From 3386864782a037ff482f2437bdb70a0654d5596e Mon Sep 17 00:00:00 2001 From: MKHan91 Date: Sat, 4 Sep 2021 21:44:00 +0900 Subject: [PATCH 09/12] Fit the code: evaluation code --- msaspp_main.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/msaspp_main.py b/msaspp_main.py index 9121c60..9bc721c 100644 --- a/msaspp_main.py +++ b/msaspp_main.py @@ -112,22 +112,27 @@ def main(): os.system(command) if args.checkpoint_path == '': - aux_out_path = os.path.join(args.log_directory, args.model_name) + model_out_path = os.path.join(args.log_directory, args.model_name, model_filename) - command = 'cp denseASPP.py ' + aux_out_path + command = 'cp denseASPP.py ' + model_out_path os.system(command) + aux_out_path = os.path.join(args.log_directory, args.model_name) + command = 'cp msaspp_main.py ' + aux_out_path os.system(command) command = 'cp msaspp_dataloader.py ' + aux_out_path os.system(command) else: + args.checkpoint_path = os.path.join(args.checkpoint_path, args.model_name, 'model') loaded_model_dir = os.path.dirname(args.checkpoint_path) loaded_model_name = os.path.basename(loaded_model_dir) loaded_model_filename = loaded_model_name + '.py' - model_out_path = os.path.join(args.checkpoint_path, args.model_name, model_filename) + model_out_path = os.path.join(args.log_directory, args.model_name, model_filename) + command = 'cp ' + os.path.join(loaded_model_dir, loaded_model_filename) + ' ' + model_out_path + os.system(command) torch.cuda.empty_cache() args.distributed = args.world_size > 1 or args.multiprocessing_distributed From 2c91573c4a0020fb2dae21e22a52e22a105cf7f2 Mon Sep 17 00:00:00 2001 From: MKHan91 Date: Sun, 5 Sep 2021 18:45:20 +0900 Subject: [PATCH 10/12] commit --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 6678241..b1e58c6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +<<<<<<< HEAD # Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation ## Introduction @@ -17,3 +18,7 @@ Our team is still organizing the code, so the readability of the code is low. ### Predicted image ![cityscapes_image](./test_image/Semantic_Segmentation.JPG) +======= +# SpyGR-segmentation +SpyGR code after paper review +>>>>>>> fcca632a5229680c3b6ce3b677330a69d4527d53 From 9ec15cb651070eaf69ae4cabf7fabc488f84c5be Mon Sep 17 00:00:00 2001 From: MKHan91 Date: Sun, 5 Sep 2021 18:46:32 +0900 Subject: [PATCH 11/12] Fix the code: delete the code --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index b1e58c6..6678241 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -<<<<<<< HEAD # Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation ## Introduction @@ -18,7 +17,3 @@ Our team is still organizing the code, so the readability of the code is low. ### Predicted image ![cityscapes_image](./test_image/Semantic_Segmentation.JPG) -======= -# SpyGR-segmentation -SpyGR code after paper review ->>>>>>> fcca632a5229680c3b6ce3b677330a69d4527d53 From 81f80e75b1770be18c83e363ebc7505619bfa236 Mon Sep 17 00:00:00 2001 From: MKHan91 Date: Sat, 25 Sep 2021 18:38:56 +0900 Subject: [PATCH 12/12] modifying the code --- README.md | 2 +- denseASPP.py | 1 - miscellaneous/misc.py | 53 +++++++++++--- msaspp_dataloader.py | 47 ++++++------- msaspp_inference.py | 96 ++++++++++++-------------- msaspp_main.py | 86 +++++++++++++---------- msaspp_test.py | 156 ------------------------------------------ transforms.py | 76 ++++++++++++++++++++ 8 files changed, 241 insertions(+), 276 deletions(-) delete mode 100644 msaspp_test.py create mode 100644 transforms.py diff --git a/README.md b/README.md index 6678241..def903e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation +`# Multi-Scale Atrous Spatial Pyramid Pooling for Larger and Denser receptive field in Semantic Segmentation ## Introduction This code have simply upgraded code in the paper, "DenseASPP for Semantic Segmentation in Street Scenes". The paper don't have training code, only proposed model code so our team have implemented training code. But not simple training implementation code, our contribution is proposed. diff --git a/denseASPP.py b/denseASPP.py index 8bf2b1c..9604aac 100644 --- a/denseASPP.py +++ b/denseASPP.py @@ -10,7 +10,6 @@ class DenseASPP(nn.Module): * output_scale can only set as 8 or 16 """ def __init__(self, args, model_cfg, n_class=19, output_stride=8): - # def __init__(self, model_cfg, n_class=19, output_stride=8): super(DenseASPP, self).__init__() # bn_size = model_cfg['bn_size'] bn_size = args.batch_size diff --git a/miscellaneous/misc.py b/miscellaneous/misc.py index 5340b74..b3ac0ca 100755 --- a/miscellaneous/misc.py +++ b/miscellaneous/misc.py @@ -1,14 +1,14 @@ -import os -from math import ceil - +import torch.nn.functional as F import numpy as np import torch -import torch.nn.functional as F -from torch import nn -from torch.autograd import Variable +import math +import os from sklearn.metrics import confusion_matrix -import math +from torch.autograd import Variable +from tqdm import tqdm +from math import ceil +from torch import nn def check_mkdir(dir_name): if not os.path.exists(dir_name): @@ -404,4 +404,41 @@ def wrapper(self, x): outputs_all_scales += outputs return outputs_all_scales - return wrapper \ No newline at end of file + return wrapper + +def evaluate_model(args, val_dataloader, model, criterion): + eval_miou_sum = 0 + # sum_iou_class = 0 + eval_loss_sum = 0 + for sampled_eval in tqdm(val_dataloader.data): + with torch.no_grad(): + eval_image = sampled_eval['image'].cuda(args.gpu, non_blocking=True) + eval_gt = sampled_eval['gt'].cuda(args.gpu, non_blocking=True) + + eval_output = model(eval_image) + model.eval() + eval_loss = criterion(eval_output, eval_gt) + + eval_output = F.softmax(eval_output, dim=1) + eval_output = torch.argmax(eval_output, dim=1) + eval_output = eval_output.contiguous().view(-1) + eval_gt = eval_gt.contiguous().view(-1) + + iou_per_class = [] + for num_class in range(len(val_dataloader.class_names)): + true_class = (eval_output == num_class) + true_label = (eval_gt == num_class) + if true_label.long().sum().item() == 0: + iou_per_class.append(np.nan) + else: + intersect = torch.logical_and(true_class, true_label).sum().float().item() + union = torch.logical_or(true_class, true_label).sum().float().item() + + iou = (intersect + 1e-10) / (union + 1e-10) + iou_per_class.append(iou) + + eval_miou_sum += np.nanmean(iou_per_class) + # sum_iou_class += sum(iou_per_class) + eval_loss_sum += eval_loss + + return eval_loss_sum, eval_miou_sum \ No newline at end of file diff --git a/msaspp_dataloader.py b/msaspp_dataloader.py index 366e473..088c234 100644 --- a/msaspp_dataloader.py +++ b/msaspp_dataloader.py @@ -2,6 +2,7 @@ import random import numpy as np import torch +import torchvision from torchvision.transforms import functional as F from torch.utils.data import Dataset, DataLoader, dataloader @@ -130,6 +131,14 @@ def __getitem__(self, index): gt_copy[gt == key] = value gt = Image.fromarray(gt_copy.astype(np.uint8)) + if self.mode == 'eval': + image = np.array(image, dtype=np.float32) / 255.0 + gt = np.array(gt, dtype=np.float32) + + sample = {'image': image, 'gt': gt} + sample = self.transform(sample) + return data_name, sample + rescaled_image, rescaled_gt = self.resize_random_crop(image, gt, self.args.input_height, self.args.input_width) rescaled_image = np.array(rescaled_image, dtype=np.float32) / 255.0 @@ -139,33 +148,25 @@ def __getitem__(self, index): sample = {'image': image, 'gt': gt} sample = self.transform(sample) - if self.mode == 'eval': - return data_name, sample return sample - # elif self.mode == 'val': - # image = np.array(image, dtype=np.float32) / 255.0 - # gt = np.array(gt, dtype=np.float32) - - # sample = {'image': image, 'gt': gt} - # sample = self.transform(sample) - # return sample - - # def rotate_image(self, img, angle, flag=Image.BILINEAR): - # result = img.rotate(angle, resample=flag) - - # return result - def resize_random_crop(self, image, gt, height, width): scaling = random.uniform(0.5, 2.0) scale_w, scale_h = [int(i*scaling) for i in image.size] + resized_image = image.resize((scale_w, scale_h), Image.CUBIC) resized_gt = gt.resize((scale_w, scale_h), Image.NEAREST) - i, j, h, w = transforms.RandomCrop.get_params(resized_image, output_size=(height, width)) - crop_image = F.crop(resized_image, i, j, h, w) - crop_gt = F.crop(resized_gt, i, j, h, w) + x1 = random.randint(0, scale_w - width) + y1 = random.randint(0, scale_h - height) + + crop_image = resized_image.crop((x1, y1, x1 + width, y1 + height)) + crop_gt = resized_gt.crop((x1, y1, x1 + width, y1 + height)) + + # i, j, h, w = transforms.RandomCrop.get_params(resized_image, output_size=(height, width)) + # crop_image = F.crop(resized_image, i, j, h, w) + # crop_gt = F.crop(resized_gt, i, j, h, w) return crop_image, crop_gt @@ -185,13 +186,13 @@ def train_preprocess(self, image, gt): def augment_image(self, image): # gamma augmentation - # gamma = random.uniform(0.5, 2.0) - # image_aug = image ** gamma + gamma = random.uniform(0.9, 1.1) + image_aug = image ** gamma # brightness augmentation - brightness = random.uniform(-10, 10) - image_aug = image * brightness - + brightness = random.uniform(0.9, 1.1) + image_aug = image_aug * brightness + # color augmentation # colors = np.random.uniform(0.9, 1.1, size=3) # white = np.ones((image.shape[0], image.shape[1])) diff --git a/msaspp_inference.py b/msaspp_inference.py index bbf3cae..d6011b6 100644 --- a/msaspp_inference.py +++ b/msaspp_inference.py @@ -14,45 +14,35 @@ from tqdm import tqdm parser = argparse.ArgumentParser(description="msaspp image segmentation inference code") -parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v2') -parser.add_argument('--data_path', type=str, help='test data path', default=os.getcwd()) -parser.add_argument('--input_height', type=int, help='input height', default=512) -parser.add_argument('--input_width', type=int, help='input width', default=512) -parser.add_argument('--batch_size', type=int, help='train batch size', default=8) -parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default=os.path.join(os.getcwd(), 'log')) -parser.add_argument('--num_checkpoint', type=str, help='model to be saved after training', default='model-0029000_mIoU-0.486.pth') -parser.add_argument('--num_seed', type=int, help='random seed number', default=1) -parser.add_argument('--num_threads', type=int, help='number of threads to use for data loading', default=5) -parser.add_argument('--gpu', type=int, help='GPU id to use', default=0) +parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v3') +parser.add_argument('--data_path', type=str, help='test data path', default=os.getcwd()) +parser.add_argument('--input_height', type=int, help='input height', default=512) +parser.add_argument('--input_width', type=int, help='input width', default=512) +parser.add_argument('--batch_size', type=int, help='train batch size', default=8) +parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default=os.path.join(os.getcwd(), 'log')) +parser.add_argument('--num_checkpoint', type=str, help='model to be saved after training', default='model-0029500_mIoU-0.445.pth') +parser.add_argument('--num_seed', type=int, help='random seed number', default=1) +parser.add_argument('--num_threads', type=int, help='number of threads to use for data loading', default=5) +parser.add_argument('--gpu', type=int, help='GPU id to use', default=0) args = parser.parse_args() -def save_prediction(prediction): - command = 'mkdir ' + os.path.join(os.getcwd(), "prediction_image") - os.system(command) - - plt.figure(figsize=(40, 10)) - for i in range(19): - pred = prediction[:, i, :].permute(1, 2, 0) - pred = pred.detach().cpu().numpy() - plt.subplot(1, 19, i+1) - plt.imshow(pred) - plt.axis('off') - - plt.savefig(os.path.join(os.getcwd(), "prediction_image", "pred_imageset.png"), dpi=400, bbox_inches='tight') - print('Saving done') -def cover_colormap(image): +def check_directory(path): + if not os.path.exists(path): + os.mkdir(path) + +def fill_colormap(prediction): COLOR_MAP = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153), - (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), - (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)] + (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), + (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)] - image = image.data.cpu().numpy() - # image = image[0, :] - image = image.argmax(axis=1)[0] - row, col = image.shape + prediction = prediction.data.cpu().numpy() + # prediction = prediction[0, :] + prediction = prediction.argmax(axis=1)[0] + row, col = prediction.shape dst = np.zeros((row, col, 3), dtype=np.uint8) for i in range(19): - dst[image == i] = COLOR_MAP[i] + dst[prediction == i] = COLOR_MAP[i] dst = np.array(dst, dtype=np.uint8) dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR) @@ -78,46 +68,48 @@ def get_iou(prediction, gt): return iou_per_class def test(args): - dataloader_val = msasppDataLoader(args, mode='val') + dataloader_eval = msasppDataLoader(args, mode='eval') model = DenseASPP(args, model_cfg=DenseASPP121.Model_CFG) torch.cuda.set_device(args.gpu) model = torch.nn.DataParallel(model, device_ids=[args.gpu]) - model_path = os.path.join(args.checkpoint_path, args.model_name, 'eval_model', args.num_checkpoint) + model_path = os.path.join(args.log_directory, args. model_name, 'eval_model', args.num_checkpoint) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['model']) model.eval() model.cuda(args.gpu) with torch.no_grad(): - for idx, sample in enumerate(dataloader_val.data): - + for idx, (name, sample) in enumerate(dataloader_eval.data): image = sample['image'].cuda(args.gpu) gt = sample['gt'].cuda(args.gpu) prediction = model(image) - + prediction = F.log_softmax(prediction, dim=1) iou_per_class = get_iou(prediction, gt) + if prediction.size()[0] < 1024: + prediction = F.upsample(prediction, size=(1024, 2048), mode='bilinear') mIoU = np.nanmean(iou_per_class) - print(mIoU) - # color_prediction = cover_colormap(prediction) + color_prediction = fill_colormap(prediction) + print("idx: {}, data name: {}, mIoU: {}".format(idx+1, *name, mIoU)) - # plt.suptitle("{}".format(*data_name)) - # plt.figure(figsize=(10, 5)) - # plt.subplot(1, 2, 1) - # plt.imshow(color_prediction) - # plt.axis('off') + check_directory(path=os.path.join(args.log_directory, args.model_name, 'prediction_image')) + plt.figure(figsize=(18, 5)) + plt.suptitle("name: {} - mIoU: {}".format(*name, mIoU)) + plt.subplot(1, 2, 1) + plt.imshow(color_prediction) + plt.axis('off') - # plt_img = image.squeeze().permute(1, 2, 0) - # plt_img = plt_img.data.cpu().numpy() - # plt.subplot(1, 2, 2) - # plt.imshow(plt_img) - # plt.axis('off') - - # plt.savefig('{}_color.png'.format(*data_name), dpi=400, bbox_inches='tight') - a=1 + plt_img = image.squeeze().permute(1, 2, 0) + plt_img = plt_img.data.cpu().numpy() + plt.subplot(1, 2, 2) + plt.imshow(plt_img) + plt.axis('off') + + plt.savefig(os.path.join(args.log_directory, args.model_name, 'prediction_image', '{}_color'.format(*name)), dpi=400, bbox_inches='tight') + if __name__ == "__main__": test(args) \ No newline at end of file diff --git a/msaspp_main.py b/msaspp_main.py index ddbfcee..a4f15fb 100644 --- a/msaspp_main.py +++ b/msaspp_main.py @@ -21,7 +21,7 @@ parser = argparse.ArgumentParser(description='Multi-scale ASPP training') parser.add_argument('--mode', type=str, help='training and validation mode', default='train') -parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v3') +parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v5.1') # Dataset parser.add_argument('--data_path', type=str, help='training data path', default=os.getcwd()) @@ -64,10 +64,36 @@ def check_folder(path): if not os.path.exists(path): os.makedirs(path) +def fast_hist(pred, gtruth, num_classes): + # mask indicates pixels we care about + mask = (gtruth >= 0) & (gtruth < num_classes) + + # stretch ground truth labels by num_classes + # class 0 -> 0 + # class 1 -> 19 + # class 18 -> 342 + # + # TP at 0 + 0, 1 + 1, 2 + 2 ... + # + # TP exist where value == num_classes*class_id + class_id + # FP = row[class].sum() - TP + # FN = col[class].sum() - TP + hist = np.bincount(num_classes * gtruth[mask].astype(int) + pred[mask], + minlength=num_classes ** 2) + hist = hist.reshape(num_classes, num_classes) + return hist + +def eval_metrics(hist): + acc = np.diag(hist).sum() / hist.sum() + acc_cls = np.diag(hist) / hist.sum(axis=1) + acc_cls = np.nanmean(acc_cls) + divisor = hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist) + iu = np.diag(hist) / divisor + + return iu, acc, acc_cls def evaluate_model(val_dataloader, model, criterion): - eval_miou_sum = 0 - # sum_iou_class = 0 + iou_acc = 0 eval_loss_sum = 0 for sampled_eval in tqdm(val_dataloader.data): with torch.no_grad(): @@ -76,32 +102,25 @@ def evaluate_model(val_dataloader, model, criterion): eval_output = model(eval_image) model.eval() - eval_loss = criterion(eval_output, eval_gt) - - eval_output = F.softmax(eval_output, dim=1) - eval_output = torch.argmax(eval_output, dim=1) - eval_output = eval_output.contiguous().view(-1) - eval_gt = eval_gt.contiguous().view(-1) - - iou_per_class = [] - for num_class in range(len(val_dataloader.class_names)): - true_class = (eval_output == num_class) - true_label = (eval_gt == num_class) - if true_label.long().sum().item() == 0: - iou_per_class.append(np.nan) - else: - intersect = torch.logical_and(true_class, true_label).sum().float().item() - union = torch.logical_or(true_class, true_label).sum().float().item() - - iou = (intersect + 1e-10) / (union + 1e-10) - iou_per_class.append(iou) - eval_miou_sum += np.nanmean(iou_per_class) - # sum_iou_class += sum(iou_per_class) + eval_loss = criterion(eval_output, eval_gt) eval_loss_sum += eval_loss - return eval_loss_sum, eval_miou_sum + eval_output = F.softmax(eval_output, dim=1) + eval_output = torch.argmax(eval_output, dim=1) + + eval_output = eval_output.detach().cpu().numpy() + eval_gt = eval_gt.detach().cpu().numpy() + + _iou_acc = fast_hist(eval_output.flatten(), eval_gt.flatten(), 19) + iou_acc += _iou_acc + + mean_eval_loss = eval_loss_sum/len(val_dataloader.data) + iu, acc, acc_cls = eval_metrics(iou_acc) + mean_iu = np.nanmean(iu) + return mean_iu, acc, acc_cls, mean_eval_loss + def main(): model_filename = args.model_name + '.py' command = 'mkdir ' + os.path.join(args.log_directory, args.model_name) @@ -250,8 +269,7 @@ def main_worker(ngpus_per_node, args): duration = 0 time_sofar = (time.time() - start_time) / 3600 training_time_left = (num_total_steps / global_step - 1.0) * time_sofar - # if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): - # print("{}".format(args.model_name)) + print_string = 'GPU: {} | examples/s: {:4.2f} | Average train loss: {:.5f} | time elapsed: {:.2f}h | time left: {:.2f}h' print(print_string.format(args.gpu, examples_per_sec, sum(train_loss_list)/len(train_loss_list), time_sofar, training_time_left)) @@ -275,15 +293,13 @@ def main_worker(ngpus_per_node, args): if global_step and global_step % args.save_freq == 0 and not model_just_loaded: if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): - eval_loss_sum, eval_miou_sum = evaluate_model(val_dataloader, model, criterion) - # eval_loss_sum, sum_iou_class = evaluate_model(val_dataloader, model, criterion) - mIoU = eval_miou_sum / len(val_dataloader.data) - mIoU_list.append(mIoU) + mean_iu, acc, acc_cls, mean_eval_loss = evaluate_model(val_dataloader, model, criterion) + mIoU_list.append(mean_iu) idx_max = mIoU_list.index(max(mIoU_list)) if idx_max == 0 and idx_max == (global_step // args.save_freq - 1): - print_string = 'GPU: {} | Epoch: {}/{} | Average train loss: {:.5f} | Average validation loss: {:.5f} | evaluation mIoU: {:.5f}' - print(print_string.format(args.gpu, epoch, args.num_epochs, sum(train_loss_list)/len(train_loss_list), eval_loss_sum/len(val_dataloader.data), mIoU_list[idx_max])) + print_string = 'GPU: {} | Epoch: {}/{} | Average train loss: {:.5f} | Average validation loss: {:.5f} | evaluation mIoU: {:.5f} | evaluation acc: {:.5f} | evaluation acc cls: {:.5f}' + print(print_string.format(args.gpu, epoch, args.num_epochs, sum(train_loss_list)/len(train_loss_list), mean_eval_loss, mIoU_list[idx_max], acc, acc_cls)) checkpoint = {'global_step': global_step, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), @@ -295,8 +311,8 @@ def main_worker(ngpus_per_node, args): pass elif idx_max !=0 and idx_max == (global_step // args.save_freq - 1): - print_string = 'GPU: {} | Epoch: {}/{} | Average train loss: {:.5f} | Average validation loss: {:.5f} | evaluation mIoU: {:.5f}' - print(print_string.format(args.gpu, epoch, args.num_epochs, sum(train_loss_list)/len(train_loss_list), eval_loss_sum/len(val_dataloader.data), max(mIoU_list))) + print_string = 'GPU: {} | Epoch: {}/{} | Average train loss: {:.5f} | Average validation loss: {:.5f} | evaluation mIoU: {:.5f} | evaluation acc: {:.5f} | evaluation acc cls: {:.5f}' + print(print_string.format(args.gpu, epoch, args.num_epochs, sum(train_loss_list)/len(train_loss_list), mean_eval_loss, max(mIoU_list), acc, acc_cls)) checkpoint = {'global_step': global_step, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), diff --git a/msaspp_test.py b/msaspp_test.py deleted file mode 100644 index 1a26b3a..0000000 --- a/msaspp_test.py +++ /dev/null @@ -1,156 +0,0 @@ - -import torch.nn.functional as F -import matplotlib.pyplot as plt -import numpy as np -import argparse -import torch -import cv2 -import os - -import addToConfusionMatrix -from msaspp_dataloader import msasppDataLoader -from cityscapesLabels import labels, id2label -from denseASPP import DenseASPP -from cfgs import DenseASPP121 -from tqdm import tqdm - -parser = argparse.ArgumentParser(description="msaspp image segmentation inference code") -parser.add_argument('--model_name', type=str, help='model name to be trained', default='denseaspp-v2') -parser.add_argument('--data_path', type=str, help='test data path', default=os.getcwd()) -parser.add_argument('--input_height', type=int, help='input height', default=512) -parser.add_argument('--input_width', type=int, help='input width', default=512) -parser.add_argument('--batch_size', type=int, help='train batch size', default=8) -parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default=os.path.join(os.getcwd(), 'log')) -parser.add_argument('--num_checkpoint', type=str, help='model to be saved after training', default='model-0029000_mIoU-0.486.pth') -parser.add_argument('--num_seed', type=int, help='random seed number', default=1) -parser.add_argument('--num_threads', type=int, help='number of threads to use for data loading', default=5) -parser.add_argument('--gpu', type=int, help='GPU id to use', default=0) -args = parser.parse_args() - -def save_prediction(prediction): - command = 'mkdir ' + os.path.join(os.getcwd(), "prediction_image") - os.system(command) - - plt.figure(figsize=(40, 10)) - for i in range(19): - pred = prediction[:, i, :].permute(1, 2, 0) - pred = pred.detach().cpu().numpy() - plt.subplot(1, 19, i+1) - plt.imshow(pred) - plt.axis('off') - - plt.savefig(os.path.join(os.getcwd(), "prediction_image", "pred_imageset.png"), dpi=400, bbox_inches='tight') - print('Saving done') - -def cover_colormap(image): - COLOR_MAP = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153), - (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), - (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)] - - image = image.data.cpu().numpy() - # image = image[0, :] - image = image.argmax(axis=1)[0] - row, col = image.shape - dst = np.zeros((row, col, 3), dtype=np.uint8) - for i in range(19): - dst[image == i] = COLOR_MAP[i] - dst = np.array(dst, dtype=np.uint8) - dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR) - - return dst - -def get_iou(prediction, gt): - prediction = torch.argmax(prediction, dim=1) - prediction = prediction.contiguous().view(-1) - gt = gt.contiguous().view(-1) - iou_per_class = [] - for num_class in range(19): - true_class = (prediction == num_class) - true_label = (gt == num_class) - if true_label.long().sum().item() == 0: - iou_per_class.append(np.nan) - else: - intersect = torch.logical_and(true_class, true_label).sum().float().item() - union = torch.logical_or(true_class, true_label).sum().float().item() - - iou = (intersect + 1e-10) / (union + 1e-10) - iou_per_class.append(iou) - - return iou_per_class - -def generateMatrix(args): - - args.evalLabels = [] - for label in labels: - if (label.id < 0): - continue - # we append all found labels, regardless of being ignored - args.evalLabels.append(label.id) - maxId = max(args.evalLabels) - # We use longlong type to be sure that there are no overflows - return np.zeros(shape=(maxId+1, maxId+1),dtype=np.ulonglong) - -def getIouScoreForLabel(label, confMatrix, args): - if id2label[label].ignoreInEval: - return float('nan') - - # the number of true positive pixels for this label - # the entry on the diagonal of the confusion matrix - tp = np.longlong(confMatrix[label,label]) - - # the number of false negative pixels for this label - # the row sum of the matching row in the confusion matrix - # minus the diagonal entry - fn = np.longlong(confMatrix[label,:].sum()) - tp - - # the number of false positive pixels for this labels - # Only pixels that are not on a pixel with ground truth label that is ignored - # The column sum of the corresponding column in the confusion matrix - # without the ignored rows and without the actual label of interest - notIgnored = [l for l in args.evalLabels if not id2label[l].ignoreInEval and not l==label] - fp = np.longlong(confMatrix[notIgnored,label].sum()) - - # the denominator of the IOU score - denom = (tp + fp + fn) - if denom == 0: - return float('nan') - - # return IOU - return float(tp) / denom - -def test(args): - confMatrix = generateMatrix(args) - classScoreList = {} - for label in args.evalLabels: - labelName = id2label[label].name - classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args) - - dataloader_test = msasppDataLoader(args, mode='test') - - model = DenseASPP(args, model_cfg=DenseASPP121.Model_CFG) - torch.cuda.set_device(args.gpu) - model = torch.nn.DataParallel(model, device_ids=[args.gpu]) - - model_path = os.path.join(args.checkpoint_path, args.model_name, 'eval_model', args.num_checkpoint) - checkpoint = torch.load(model_path) - model.load_state_dict(checkpoint['model']) - model.eval() - model.cuda(args.gpu) - - with torch.no_grad(): - for data_name, sample in dataloader_test.data: - image = sample['image'].cuda(args.gpu) - gt = sample['gt'].cuda(args.gpu) - - prediction = model(image) - - prediction = F.softmax(prediction, dim=1) - confMatrix = addToConfusionMatrix.cEvaluatePair(predictionNp, groundTruthNp, confMatrix, args.evalLabels) - iou_per_class = get_iou(prediction, gt) - color_prediction = cover_colormap(prediction) - - plt.imshow(color_prediction) - plt.savefig('{}_color.png'.format(*data_name)) - a=1 -if __name__ == "__main__": - test(args) \ No newline at end of file diff --git a/transforms.py b/transforms.py new file mode 100644 index 0000000..474a2a7 --- /dev/null +++ b/transforms.py @@ -0,0 +1,76 @@ +import random + + +import numpy as np +from skimage.filters import gaussian +import torch +from PIL import Image, ImageFilter + + +class Compose(object): + def __init__(self, transform): + self.transform = transform + + + def __call__(self, mask): + for t in self.transform: + mask = t(mask) + return mask + + +class RandomVerticalFlip(object): + def __call__(self, img): + if random.random() < 0.5: + return img.transpose(Image.FLIP_TOP_BOTTOM) + return img + + + + +class DeNormalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + + def __call__(self, tensor): + for t, m, s in zip(tensor, self.mean, self.std): + t.mul_(s).add_(m) + return tensor + + + + +class MaskToTensor(object): + def __call__(self, img): + return torch.from_numpy(np.array(img, dtype=np.int32)).long() + + + + +class FreeScale(object): + def __init__(self, size, interpolation=Image.BILINEAR): + self.size = tuple(reversed(size)) # size: (h, w) + self.interpolation = interpolation + + + def __call__(self, img): + return img.resize(self.size, self.interpolation) + + + + +class FlipChannels(object): + def __call__(self, img): + img = np.array(img)[:, :, ::-1] + return Image.fromarray(img.astype(np.uint8)) + + + + +class RandomGaussianBlur(object): + def __call__(self, img): + sigma = 0.15 + random.random() * 1.15 + blurred_img = gaussian(np.array(img), sigma=sigma, multichannel=True) + blurred_img *= 255 + return Image.fromarray(blurred_img.astype(np.uint8)) \ No newline at end of file