Merge branch 'master' of github.com:adursun/wsddn.pytorch

fatihbaltaci · fatihbaltaci · commit e6fc9a71ba4a · 2019-12-10T22:29:04.000+03:00
diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,6 @@ FROM nvidia/cuda:10.0-devel-ubuntu18.04
 WORKDIR /ws
 
 COPY requirements.txt /ws/
-COPY install_detectron2.sh /ws/
 
 RUN apt update && apt install -y apt-utils git vim libsm6 libxext6 libxrender-dev python3 python3-dev python3-pip
 RUN pip3 install -r requirements.txt
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
 
 ```
 ./prepare.sh
-docker run --gpus all -v `pwd`:/ws -it wsddn.pytorch /bin/bash
+docker run --gpus all --ipc=host -v `pwd`:/ws -it wsddn.pytorch /bin/bash
 ```
 
 ## Jupyter
diff --git a/install_detectron2.sh b/install_detectron2.sh
diff --git a/prepare.sh b/prepare.sh
@@ -8,9 +8,9 @@ wget http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar -P data/
 mkdir data/VOCtest_06-Nov-2007
 tar xf data/VOCtest_06-Nov-2007.tar --directory data/VOCtest_06-Nov-2007/
 
-# download and extract selective search windows boxes
-wget http://www.cs.cmu.edu/~spurushw/hw2_files/selective_search_data.tar -P data/
-tar xf data/selective_search_data.tar --directory data/
+# download and extract edgeboxes proposals
+wget https://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/EdgeBoxesVOC2007test.mat -P data/
+wget https://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/EdgeBoxesVOC2007trainval.mat -P data/
 
 # download pretrained alexnet weights
 wget https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth -P states/
diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,4 @@ black==19.3b0
 isort==4.3.21
 albumentations==0.4.3
 Cython==0.29.14
+rope==0.14.0
diff --git a/src/datasets.py b/src/datasets.py
@@ -7,10 +7,10 @@
 from scipy.io import loadmat
 from torch.utils.data import Dataset
 
-from utils import TRANSFORMS, prepare, swap_axes
+from utils import TRANSFORMS, filter_small_boxes, prepare, swap_axes
 
 
-class VOCandSSW(Dataset):
+class VocAndEb(Dataset):
 
     CLASS2ID = {
         "aeroplane": 0,
@@ -36,36 +36,35 @@ class VOCandSSW(Dataset):
     }
 
     def __init__(self, split, scales):
-        assert split in ["trainval", "test"], "`split` should be in [train, test]"
+        assert split in ["trainval", "test"], "`split` should be in [trainval, test]"
 
         self.split = split
         self.scales = scales
 
-        loaded_mat = loadmat(f"../data/selective_search_data/voc_2007_{self.split}.mat")
-        self.ssw_boxes = loaded_mat["boxes"][0]
-        self.ssw_scores = loaded_mat["boxScores"][0]
+        loaded_mat = loadmat(f"../data/EdgeBoxesVOC2007{self.split}.mat")
+        self.eb_boxes = loaded_mat["boxes"][0]
+        self.eb_scores = loaded_mat["boxScores"][0]
+        self.ids = [str(id_[0]) for id_ in loaded_mat["images"][0]]
 
         voc_dir = f"../data/VOC{self.split}_06-Nov-2007/VOCdevkit/VOC2007"
-        self.ids = [
-            id_.strip() for id_ in open(f"{voc_dir}/ImageSets/Main/{self.split}.txt")
-        ]
         self.img_paths = [f"{voc_dir}/JPEGImages/{id_}.jpg" for id_ in self.ids]
         self.annotation_paths = [f"{voc_dir}/Annotations/{id_}.xml" for id_ in self.ids]
 
     def get_boxes_and_scores(self, i):
         # (box_count, 4)
         # dtype: float32
         # box format: (y_min, x_min, y_max, x_max)
-        boxes = self.ssw_boxes[i].astype(np.float32)
+        boxes = self.eb_boxes[i].astype(np.float32)
 
         # box format: (x_min, y_min, x_max, y_max)
         # this can be improved
         boxes = swap_axes(boxes)
+        mask = filter_small_boxes(boxes, 20)
 
         # (box_count, 1)
         # dtype: float64
-        scores = self.ssw_scores[i]
-        return boxes, scores
+        scores = self.eb_scores[i]
+        return boxes[mask], scores[mask]
 
     def get_target(self, gt_labels):
         target = np.full(20, 0, dtype=np.float32)
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -3,9 +3,9 @@
 import torch
 from torch.utils.data import DataLoader
 
-from datasets import VOCandSSW
+from datasets import VocAndEb
 from network import WSDDN
-from utils import evaluate, evaluate_detectron2
+from utils import evaluate
 
 SCALES = [480, 576, 688, 864, 1200]
 DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -21,8 +21,6 @@
 
     print("State is loaded")
 
-    test_ds = VOCandSSW("test", SCALES)  # len = 4952
-    test_dl = DataLoader(test_ds, batch_size=None, shuffle=False, num_workers=1)
-
-    # evaluate_detectron2(net, test_dl)
+    test_ds = VocAndEb("test", SCALES)  # len = 4952
+    test_dl = DataLoader(test_ds, batch_size=None, shuffle=False, num_workers=4)
     evaluate(net, test_dl)
diff --git a/src/train.py b/src/train.py
@@ -7,8 +7,9 @@
 from torch import optim
 from torch.optim.lr_scheduler import MultiStepLR
 from torch.utils.data import DataLoader
+from tqdm import tqdm
 
-from datasets import VOCandSSW
+from datasets import VocAndEb
 from network import WSDDN
 from utils import evaluate
 
@@ -38,11 +39,11 @@
     SAVE_STATE_PER_EPOCH = 5
 
     # Create dataset and data loader
-    train_ds = VOCandSSW("trainval", SCALES)  # len = 5011
-    test_ds = VOCandSSW("test", SCALES)  # len = 4952
+    train_ds = VocAndEb("trainval", SCALES)  # len = 5011
+    test_ds = VocAndEb("test", SCALES)  # len = 4952
 
-    train_dl = DataLoader(train_ds, batch_size=1, shuffle=True, num_workers=1)
-    test_dl = DataLoader(test_ds, batch_size=None, shuffle=False, num_workers=1)
+    train_dl = DataLoader(train_ds, batch_size=1, shuffle=True, num_workers=4)
+    test_dl = DataLoader(test_ds, batch_size=None, shuffle=False, num_workers=4)
 
     # Create the network
     net = WSDDN()
@@ -60,16 +61,7 @@
     scheduler.last_epoch = OFFSET
 
     # Train the model
-    for epoch in range(OFFSET + 1, EPOCHS + 1):
-
-        print(
-            "Epoch",
-            epoch,
-            "started at",
-            datetime.now(),
-            "with lr =",
-            scheduler.get_lr(),
-        )
+    for epoch in tqdm(range(OFFSET + 1, EPOCHS + 1), "Total"):
 
         epoch_loss = 0.0
 
@@ -79,7 +71,7 @@
             batch_boxes,
             batch_scores,
             batch_target,
-        ) in train_dl:
+        ) in tqdm(train_dl, f"Epoch {epoch}"):
             optimizer.zero_grad()
 
             batch_imgs, batch_boxes, batch_scores, batch_target = (
@@ -99,14 +91,12 @@
         if epoch % SAVE_STATE_PER_EPOCH == 0:
             path = f"../states/epoch_{epoch}.pt"
             torch.save(net.state_dict(), path)
-            print("State saved to", path)
+            tqdm.write(f"State saved to {path}")
 
-        print("Avg loss is", epoch_loss / len(train_ds))
+        tqdm.write(f"Avg loss is {epoch_loss / len(train_ds)}")
 
         if epoch % EVAL_PER_EPOCH == 0:
-            print("Evaluation started at", datetime.now())
+            tqdm.write(f"Evaluation started at {datetime.now()}")
             evaluate(net, test_dl)
 
-        print("Epoch", epoch, "completed at", datetime.now(), "\n")
-
         scheduler.step()
diff --git a/src/utils.py b/src/utils.py
@@ -1,6 +1,5 @@
 import logging
 import random
-import tqdm
 from collections import defaultdict
 from datetime import datetime
 
@@ -13,8 +12,7 @@
 from PIL import Image
 from torchvision import transforms
 from torchvision.ops import nms
-
-from detectron2.evaluation import PascalVOCDetectionEvaluator
+from tqdm import tqdm
 
 # this is duplicate
 DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -56,95 +54,6 @@ def prepare(img, boxes, max_dim=None, xflip=False, gt_boxes=None, gt_labels=None
     return img, boxes, gt_boxes
 
 
-def evaluate_detectron2(net, dataloader):
-    CLASSES = [
-        "aeroplane",
-        "bicycle",
-        "bird",
-        "boat",
-        "bottle",
-        "bus",
-        "car",
-        "cat",
-        "chair",
-        "cow",
-        "diningtable",
-        "dog",
-        "horse",
-        "motorbike",
-        "person",
-        "pottedplant",
-        "sheep",
-        "sofa",
-        "train",
-        "tvmonitor",
-    ]
-
-    class Detectron2VOCEvaluator(PascalVOCDetectionEvaluator):
-        def __init__(self):
-            self._dataset_name = "voc_2007_test"
-            self._anno_file_template = (
-                "/ws/data/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations/{}.xml"
-            )
-            self._image_set_path = (
-                "/ws/data/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt"
-            )
-            self._class_names = CLASSES
-            self._is_2007 = True
-            self._cpu_device = torch.device("cpu")
-            self._logger = logging.getLogger(__name__)
-            self._predictions = defaultdict(list)
-
-    evaluator = Detectron2VOCEvaluator()
-
-    print("Evaluation started at", datetime.now())
-
-    with torch.no_grad():
-
-        net.eval()
-
-        # check img_id -> batch or single
-
-        for (img_id, img, boxes, scores, gt_boxes, gt_labels) in dataloader:
-            boxes, scores, gt_boxes, gt_labels = (
-                boxes.numpy(),
-                scores.numpy(),
-                gt_boxes.numpy(),
-                gt_labels.numpy(),
-            )
-
-            batch_imgs, batch_boxes, batch_scores = (
-                np2gpu(img, DEVICE),
-                np2gpu(boxes, DEVICE),
-                np2gpu(scores, DEVICE),
-            )
-
-            combined_scores, pred_boxes = net(batch_imgs, batch_boxes, batch_scores)
-
-            for i in range(20):
-                region_scores = combined_scores[:, i]
-
-                selected_indices = nms(pred_boxes, region_scores, 0.4)
-
-                resulting_boxes = pred_boxes[selected_indices].cpu().numpy()[:300]
-                resulting_scores = region_scores[selected_indices].cpu().numpy()[:300]
-                resulting_scores *= np.squeeze(scores[: len(resulting_scores)])
-
-                for j, resulting_box in enumerate(resulting_boxes):
-                    evaluator._predictions[i].append(
-                        f"{img_id} {resulting_scores[j]:.3f} {resulting_box[0] + 1:.1f} {resulting_box[1] + 1:.1f} {resulting_box[2]:.1f} {resulting_box[3]:.1f}"
-                    )
-
-        print("Predictions completed at", datetime.now())
-
-        net.train()
-
-    result = evaluator.evaluate()
-
-    print("Evaluation completed at", datetime.now())
-    print(result)
-
-
 def evaluate(net, dataloader):
     """Evaluates network."""
     with torch.no_grad():
@@ -156,7 +65,9 @@ def evaluate(net, dataloader):
         total_gt_boxes = []
         total_gt_labels = []
 
-        for (img_id, img, boxes, scores, gt_boxes, gt_labels) in tqdm.tqdm(dataloader, "Evaluating..."):
+        for (img_id, img, boxes, scores, gt_boxes, gt_labels) in tqdm(
+            dataloader, "Evaluation"
+        ):
             boxes, scores, gt_boxes, gt_labels = (
                 boxes.numpy(),
                 scores.numpy(),
@@ -172,6 +83,7 @@ def evaluate(net, dataloader):
                 np2gpu(gt_labels, DEVICE),
             )
 
+            # why batch_boxes is not used and pred_boxes is returned
             combined_scores, pred_boxes = net(batch_imgs, batch_boxes, batch_scores)
 
             batch_pred_boxes = []
@@ -180,13 +92,15 @@ def evaluate(net, dataloader):
 
             for i in range(20):
                 region_scores = combined_scores[:, i]
-                selected_indices = nms(pred_boxes, region_scores, 0.4)
+                score_mask = region_scores > 1e-3
+
+                selected_scores = region_scores[score_mask]
+                selected_boxes = pred_boxes[score_mask]
+                nms_mask = nms(selected_boxes, selected_scores, 0.4)
 
-                batch_pred_boxes.append(pred_boxes[selected_indices].cpu().numpy())
-                batch_pred_scores.append(region_scores[selected_indices].cpu().numpy())
-                batch_pred_labels.append(
-                    np.full(len(selected_indices), i, dtype=np.int32)
-                )
+                batch_pred_boxes.append(selected_boxes[nms_mask].cpu().numpy())
+                batch_pred_scores.append(selected_scores[nms_mask].cpu().numpy())
+                batch_pred_labels.append(np.full(len(nms_mask), i, dtype=np.int32))
 
             total_pred_boxes.append(np.concatenate(batch_pred_boxes, axis=0))
             total_pred_scores.append(np.concatenate(batch_pred_scores, axis=0))
@@ -222,8 +136,8 @@ def filter_small_boxes(boxes, min_size):
     """Filters out small boxes."""
     w = boxes[:, 2] - boxes[:, 0]
     h = boxes[:, 3] - boxes[:, 1]
-    keep = np.where((w >= min_size) & (h > min_size))[0]
-    return keep
+    mask = (w >= min_size) & (h >= min_size)
+    return mask
 
 
 def hflip(img, boxes, gt_boxes=None):