Skip to content

Commit

Permalink
Use black and isort formatters
Browse files Browse the repository at this point in the history
adursun committed Sep 21, 2019
1 parent a47302e commit 706a633
Showing 2 changed files with 97 additions and 50 deletions.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -4,3 +4,5 @@ torchvision==0.4.0
scipy==1.3.1
chainercv==0.13.1
jupyter==1.0.0
black==19.3b0
isort==4.3.21
145 changes: 95 additions & 50 deletions wsddn-pytorch.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,25 @@
import os
import random

from datetime import datetime

from PIL import Image

import xml.etree.ElementTree as ET
from datetime import datetime

import chainercv.transforms as T
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F

import torchvision
from chainercv.evaluations import eval_detection_voc
from chainercv.visualizations import vis_bbox
from IPython.display import display
from PIL import Image
from scipy.io import loadmat

from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import MultiStepLR

from IPython.display import display

import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models import alexnet
from torchvision.ops import roi_pool, nms

from chainercv.evaluations import eval_detection_voc
import chainercv.transforms as T
from chainercv.visualizations import vis_bbox

from torchvision.ops import nms, roi_pool

### Set the seed

@@ -55,14 +46,17 @@
OFFSET = 0

SCALES = [480, 576, 688, 864, 1200]
TRANSFORMS = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
TRANSFORMS = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)


### Utils


def hflip(img, boxes, gt_boxes=None):
img = T.flip(img, y_flip=False, x_flip=True)
boxes = T.flip_bbox(boxes, img[0].shape, y_flip=False, x_flip=True)
@@ -83,7 +77,7 @@ def scale(img, boxes, max_dim, gt_boxes=None):


def swap_axes(boxes):
boxes = np.stack((boxes[:,1], boxes[:,0], boxes[:,3], boxes[:,2]), axis=1)
boxes = np.stack((boxes[:, 1], boxes[:, 0], boxes[:, 3], boxes[:, 2]), axis=1)
return boxes


@@ -94,6 +88,7 @@ def np2gpu(arr):

### Create dataset and data loader


class VOCandMCG(Dataset):

CLASS2ID = {
@@ -122,14 +117,16 @@ class VOCandMCG(Dataset):
def __init__(self, split):
self.split = split

#loaded_mat = loadmat(f"/kaggle/input/selective-search-windows/selective_search_data/voc_2007_{self.split}.mat")
# loaded_mat = loadmat(f"/kaggle/input/selective-search-windows/selective_search_data/voc_2007_{self.split}.mat")
loaded_mat = loadmat(f"data/selective_search_data/voc_2007_{self.split}.mat")
self.ssw_boxes = loaded_mat["boxes"][0]
self.ssw_scores = loaded_mat["boxScores"][0]

#voc_dir = f"/kaggle/input/pascal-voc/voc{self.split}_06-nov-2007/VOCdevkit/VOC2007"
# voc_dir = f"/kaggle/input/pascal-voc/voc{self.split}_06-nov-2007/VOCdevkit/VOC2007"
voc_dir = f"data/VOC{self.split}_06-Nov-2007/VOCdevkit/VOC2007"
self.ids = [id_.strip() for id_ in open(f"{voc_dir}/ImageSets/Main/{self.split}.txt")]
self.ids = [
id_.strip() for id_ in open(f"{voc_dir}/ImageSets/Main/{self.split}.txt")
]
self.img_paths = [f"{voc_dir}/JPEGImages/{id_}.jpg" for id_ in self.ids]
self.annotation_paths = [f"{voc_dir}/Annotations/{id_}.xml" for id_ in self.ids]

@@ -165,7 +162,12 @@ def _get_annotations(self, i):
for obj in xml.findall("object"):
if obj.find("difficult").text == "0":
bndbox = obj.find("bndbox")
boxes.append([int(bndbox.find(tag).text) - 1 for tag in ("xmin", "ymin", "xmax", "ymax")])
boxes.append(
[
int(bndbox.find(tag).text) - 1
for tag in ("xmin", "ymin", "xmax", "ymax")
]
)
labels.append(self.CLASS2ID[obj.find("name").text])

boxes = np.stack(boxes).astype(np.float32)
@@ -174,8 +176,8 @@ def _get_annotations(self, i):

@staticmethod
def prepare(img, boxes, max_dim=None, xflip=False, gt_boxes=None):
img = np.asarray(img, dtype=np.float32) # use numpy array for augmentation
img = np.transpose(img, (2, 0, 1)) # convert img to CHW
img = np.asarray(img, dtype=np.float32) # use numpy array for augmentation
img = np.transpose(img, (2, 0, 1)) # convert img to CHW

# convert boxes into (ymin, xmin, ymax, xmax) format
boxes = swap_axes(boxes)
@@ -199,20 +201,22 @@ def prepare(img, boxes, max_dim=None, xflip=False, gt_boxes=None):

# convert img from CHW to HWC
img = Image.fromarray(np.transpose(img, (1, 2, 0)).astype(np.uint8), mode="RGB")
img = TRANSFORMS(img) # convert pillow image to normalized tensor
img = TRANSFORMS(img) # convert pillow image to normalized tensor

return img, boxes, gt_boxes

def __getitem__(self, i):
img = Image.open(self.img_paths[i]).convert("RGB") # open Pillow image
img = Image.open(self.img_paths[i]).convert("RGB") # open Pillow image

boxes, scores = self.get_boxes_and_scores(i)
gt_boxes, gt_labels = self._get_annotations(i)

if self.split == "test":
return img, boxes, scores, gt_boxes, gt_labels

img, boxes, _ = self.prepare(img, boxes, random.choice(SCALES), random.choice([False, True]))
img, boxes, _ = self.prepare(
img, boxes, random.choice(SCALES), random.choice([False, True])
)
target = self.get_target(gt_labels)
return img, boxes, scores, target

@@ -229,6 +233,7 @@ def __len__(self):

### Create the network


class WSDDN(nn.Module):
base = alexnet(pretrained=False)

@@ -244,11 +249,11 @@ def forward(self, batch_imgs, batch_boxes, batch_scores):
# assume batch size is 1
batch_boxes = [batch_boxes[0]]

out = self.features(batch_imgs) # [1, 256, 21, 29]
out = roi_pool(out, batch_boxes, (6, 6), 1.0/16) # [4000, 256, 6, 6]
out = self.features(batch_imgs) # [1, 256, 21, 29]
out = roi_pool(out, batch_boxes, (6, 6), 1.0 / 16) # [4000, 256, 6, 6]

out = out.view(-1, 9216) # [4000, 9216]
out = self.fcs(out) # [4000, 4096]
out = out.view(-1, 9216) # [4000, 9216]
out = self.fcs(out) # [4000, 4096]

classification_scores = F.softmax(self.fc_c(out), dim=1)
detection_scores = F.softmax(self.fc_d(out), dim=0)
@@ -291,7 +296,12 @@ def loss_func(combined_scores, target):
for (batch_imgs, batch_boxes, batch_scores, batch_target) in train_dl:
optimizer.zero_grad()

batch_imgs, batch_boxes, batch_scores, batch_target = batch_imgs.to(DEVICE), batch_boxes.to(DEVICE), batch_scores.to(DEVICE), batch_target.to(DEVICE)
batch_imgs, batch_boxes, batch_scores, batch_target = (
batch_imgs.to(DEVICE),
batch_boxes.to(DEVICE),
batch_scores.to(DEVICE),
batch_target.to(DEVICE),
)
combined_scores, _ = net(batch_imgs, batch_boxes, batch_scores)

loss = loss_func(combined_scores, batch_target[0])
@@ -323,11 +333,26 @@ def loss_func(combined_scores, target):
total_gt_labels = []

for (img, boxes, scores, gt_boxes, gt_labels) in test_dl:
boxes, scores, gt_boxes, gt_labels = boxes.numpy(), scores.numpy(), gt_boxes.numpy(), gt_labels.numpy()
p_img, p_boxes, p_gt_boxes = VOCandMCG.prepare(img, boxes, max_dim, xflip, gt_boxes)

batch_imgs, batch_boxes, batch_scores, batch_gt_boxes, batch_gt_labels = np2gpu(p_img), np2gpu(p_boxes), np2gpu(scores), np2gpu(p_gt_boxes), np2gpu(gt_labels)
combined_scores, pred_boxes = net(batch_imgs, batch_boxes, batch_scores)
boxes, scores, gt_boxes, gt_labels = (
boxes.numpy(),
scores.numpy(),
gt_boxes.numpy(),
gt_labels.numpy(),
)
p_img, p_boxes, p_gt_boxes = VOCandMCG.prepare(
img, boxes, max_dim, xflip, gt_boxes
)

batch_imgs, batch_boxes, batch_scores, batch_gt_boxes, batch_gt_labels = (
np2gpu(p_img),
np2gpu(p_boxes),
np2gpu(scores),
np2gpu(p_gt_boxes),
np2gpu(gt_labels),
)
combined_scores, pred_boxes = net(
batch_imgs, batch_boxes, batch_scores
)
pred_scores, pred_labels = torch.max(combined_scores, dim=1)

batch_pred_boxes = []
@@ -339,17 +364,37 @@ def loss_func(combined_scores, target):

selected_indices = nms(pred_boxes, region_scores, 0.4)

batch_pred_boxes.append(pred_boxes[selected_indices].cpu().numpy())
batch_pred_scores.append(region_scores[selected_indices].cpu().numpy())
batch_pred_labels.append(np.full(len(selected_indices), i, dtype=np.int32))

total_pred_boxes.append(np.concatenate(batch_pred_boxes, axis=0))
total_pred_scores.append(np.concatenate(batch_pred_scores, axis=0))
total_pred_labels.append(np.concatenate(batch_pred_labels, axis=0))
batch_pred_boxes.append(
pred_boxes[selected_indices].cpu().numpy()
)
batch_pred_scores.append(
region_scores[selected_indices].cpu().numpy()
)
batch_pred_labels.append(
np.full(len(selected_indices), i, dtype=np.int32)
)

total_pred_boxes.append(
np.concatenate(batch_pred_boxes, axis=0)
)
total_pred_scores.append(
np.concatenate(batch_pred_scores, axis=0)
)
total_pred_labels.append(
np.concatenate(batch_pred_labels, axis=0)
)
total_gt_boxes.append(batch_gt_boxes[0].cpu().numpy())
total_gt_labels.append(batch_gt_labels[0].cpu().numpy())

result = eval_detection_voc(total_pred_boxes, total_pred_labels, total_pred_scores, total_gt_boxes, total_gt_labels, iou_thresh=0.5, use_07_metric=True)
result = eval_detection_voc(
total_pred_boxes,
total_pred_labels,
total_pred_scores,
total_gt_boxes,
total_gt_labels,
iou_thresh=0.5,
use_07_metric=True,
)
aps.append(result["ap"])
maps.append(result["map"])

0 comments on commit 706a633

Please sign in to comment.