Add DistDepth (monocular, CVPR 2022).

nburrus · Jun 20, 2022 · 0463139 · 0463139
1 parent 7807032
commit 0463139
Show file tree

Hide file tree

Showing 6 changed files with 111 additions and 1 deletion.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -27,6 +27,7 @@
             "justMyCode": false,
             "args": [
                 "datasets",
+                "--models-path", "models"
             ]
         },
     ]

diff --git a/README.md b/README.md
@@ -21,6 +21,8 @@ Small Python utility to **compare and visualize** the output of various **stereo
 - [STereo TRansformers](https://github.com/mli0603/stereo-transformer): "Revisiting Stereo Depth Estimation From a Sequence-to-Sequence Perspective with Transformers" (ICCV 2021)
 - [Chang et al. RealtimeStereo](https://github.com/JiaRenChang/RealtimeStereo): "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices" (ACCV 2020)
 
+- [DistDepth](https://github.com/facebookresearch/DistDepth): "Toward Practical Monocular Indoor Depth Estimation" (CVPR 2022). This one is actually a **monocular** method, only using the left image.
+
 See below for more details / credits to get each of these working, and check this [blog post for more results, including performance numbers](https://nicolas.burrus.name/stereo-comparison/).
 
 https://user-images.githubusercontent.com/541507/169557430-48e62510-60c2-4a2b-8747-f9606e405f74.mp4
@@ -110,6 +112,10 @@ I did not implement any of these myself, but just collected pre-trained models o
   - Official implementation and pre-trained models: https://github.com/JiaRenChang/RealtimeStereo
   - I exported the pytorch implementation to torch script via tracing with some minor changes to the code https://github.com/JiaRenChang/RealtimeStereo/pull/15 . See [chang_realtimestereo_to_torchscript_onnx.py](tools/chang_realtimestereo_to_torchscript_onnx.py).
 
+- DistDepth
+  - Official implementation and pre-trained models https://github.com/facebookresearch/DistDepth
+  - I exported the pytorch implementaton to torch script via tracing, see [the changes](https://github.com/facebookresearch/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720).
+
 # License
 
 The code of stereodemo is MIT licensed, but the pre-trained models are subject to the license of their respective implementation.

diff --git a/stereodemo/main.py b/stereodemo/main.py
@@ -11,6 +11,7 @@
 
 import cv2
 
+
 from . import visualizer
 from . import methods
 
@@ -20,6 +21,7 @@
 from .method_chang_realtime_stereo import ChangRealtimeStereo
 from .method_hitnet import HitnetStereo
 from .method_sttr import StereoTransformers
+from stereodemo.method_dist_depth import DistDepth
 
 def parse_args():
     import argparse
@@ -164,6 +166,7 @@ def main():
         HitnetStereo(config),
         StereoTransformers(config),
         ChangRealtimeStereo(config),
+        DistDepth(config)
     ]
 
     if args.images:

diff --git a/stereodemo/method_dist_depth.py b/stereodemo/method_dist_depth.py
@@ -0,0 +1,92 @@
+from pathlib import Path
+import shutil
+import time
+from dataclasses import dataclass
+import urllib.request
+import tempfile
+import sys
+
+import torch
+from torchvision import transforms
+
+import cv2
+import numpy as np
+
+from .methods import Calibration, Config, EnumParameter, StereoMethod, InputPair, StereoOutput
+from . import utils
+
+urls = {
+    "dist-depth-256x256.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-distdepth/dist-depth-256x256.scripted.pt",
+}
+
+# https://github.com/facebookresearch/DistDepth
+# Exported via torch tracing by tweaking the original demo.py.
+# Changes here: https://github.com/nburrus/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720
+class DistDepth(StereoMethod):
+    def __init__(self, config: Config):
+        super().__init__("[Monocular] DistDepth (CVPR 2022)",
+                         "Toward Practical Monocular Indoor Depth Estimation.",
+                         {},
+                         config)
+        self.reset_defaults()
+
+        self.net = None
+        self._loaded_model_path = None
+
+    def reset_defaults(self):
+        self.parameters.update ({
+            # "Device": EnumParameter("Device", 0, ["CPU", "CUDA"]),
+            # For some reason it crashes with CUDA on my machine, disabling for now.
+            "Device": EnumParameter("Device", 0, ["CPU"]),
+        })
+
+    def compute_disparity(self, input: InputPair) -> StereoOutput:
+        # The pre-trained model is for 256x256. Their demo script resizes
+        # all input images to that.
+        self.target_size = (256, 256)
+        device = torch.device('cuda') if self.parameters["Device"].value == 'CUDA' else 'cpu'
+
+        model_path = self.config.models_path / f'dist-depth-256x256.scripted.pt'
+        self._load_model (model_path)
+
+        # raw_img can stay in BGR
+        raw_img = np.transpose(input.left_image, (2, 0, 1))
+        input_image = torch.from_numpy(raw_img).float().to(device)
+        input_image = (input_image / 255.0).unsqueeze(0)
+        input_image = torch.nn.functional.interpolate(
+            input_image, (256, 256), mode="bilinear", align_corners=False
+        )
+
+        net = self.net.to(device)
+
+        start = time.time()
+        with torch.no_grad():
+            outputs = net(input_image.to(device))
+        elapsed_time = time.time() - start
+
+        disparity_map = self._process_output(outputs, input.calibration)
+        if disparity_map.shape[:2] != input.left_image.shape[:2]:
+            disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
+            # not need to scale, the disparity values were already for the input full resolution calibration.
+
+        return StereoOutput(disparity_map, input.left_image, elapsed_time)
+
+    def _process_output(self, outputs, calib: Calibration):
+        depth_meters = outputs[0].detach().squeeze(0).cpu().numpy()
+        # The model directly gives a depth map in meters. Let's convert it
+        # to disparity to fit in the stereo display.
+        disparity_map = StereoMethod.disparity_from_depth_meters(depth_meters, calib)
+        return disparity_map
+
+    def _load_model(self, model_path: Path):
+        if (self._loaded_model_path == model_path):
+            return
+
+        if not model_path.exists():
+            utils.download_model (urls[model_path.name], model_path)
+
+        assert Path(model_path).exists()
+        self._loaded_model_path = model_path
+        self.net = torch.jit.load(model_path)
+        self.net.cpu ()
+        self.net.eval ()
diff --git a/stereodemo/methods.py b/stereodemo/methods.py
@@ -118,3 +118,11 @@ def depth_meters_from_disparity(disparity_pixels: np.ndarray, calibration: Calib
         depth_meters[disparity_pixels < 0.] = -1.0
         np.seterr(**old_seterr)
         return depth_meters
+
+    def disparity_from_depth_meters(depth_meters: np.ndarray, calibration: Calibration):
+        old_seterr = np.seterr(divide='ignore')
+        dcx = np.float32(calibration.cx0 - calibration.cx1)
+        disparity_pixels = (np.float32(calibration.baseline_meters * calibration.fx) / depth_meters) + dcx
+        disparity_pixels = np.nan_to_num(disparity_pixels)
+        np.seterr(**old_seterr)
+        return disparity_pixels
diff --git a/stereodemo/visualizer.py b/stereodemo/visualizer.py
@@ -145,7 +145,7 @@ def __init__(self, stereo_methods: Dict[str, StereoMethod], source: Source):
         self.algo_list = gui.ListView()
         self.algo_list.set_items(list(stereo_methods.keys()))
         self.algo_list.selected_index = 0
-        self.algo_list.set_max_visible_items(7)
+        self.algo_list.set_max_visible_items(8)
         self.algo_list.set_on_selection_changed(self._on_algo_list_selected)
         self._settings_panel.add_child(self.algo_list)