diff --git a/.vscode/launch.json b/.vscode/launch.json index 2dfd591..614e7a3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -27,6 +27,7 @@ "justMyCode": false, "args": [ "datasets", + "--models-path", "models" ] }, ] diff --git a/README.md b/README.md index 56f3817..27e350c 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ Small Python utility to **compare and visualize** the output of various **stereo - [STereo TRansformers](https://github.com/mli0603/stereo-transformer): "Revisiting Stereo Depth Estimation From a Sequence-to-Sequence Perspective with Transformers" (ICCV 2021) - [Chang et al. RealtimeStereo](https://github.com/JiaRenChang/RealtimeStereo): "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices" (ACCV 2020) +- [DistDepth](https://github.com/facebookresearch/DistDepth): "Toward Practical Monocular Indoor Depth Estimation" (CVPR 2022). This one is actually a **monocular** method, only using the left image. + See below for more details / credits to get each of these working, and check this [blog post for more results, including performance numbers](https://nicolas.burrus.name/stereo-comparison/). https://user-images.githubusercontent.com/541507/169557430-48e62510-60c2-4a2b-8747-f9606e405f74.mp4 @@ -110,6 +112,10 @@ I did not implement any of these myself, but just collected pre-trained models o - Official implementation and pre-trained models: https://github.com/JiaRenChang/RealtimeStereo - I exported the pytorch implementation to torch script via tracing with some minor changes to the code https://github.com/JiaRenChang/RealtimeStereo/pull/15 . See [chang_realtimestereo_to_torchscript_onnx.py](tools/chang_realtimestereo_to_torchscript_onnx.py). +- DistDepth + - Official implementation and pre-trained models https://github.com/facebookresearch/DistDepth + - I exported the pytorch implementaton to torch script via tracing, see [the changes](https://github.com/facebookresearch/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720). + # License The code of stereodemo is MIT licensed, but the pre-trained models are subject to the license of their respective implementation. diff --git a/stereodemo/main.py b/stereodemo/main.py index 8c8720e..aec851e 100644 --- a/stereodemo/main.py +++ b/stereodemo/main.py @@ -11,6 +11,7 @@ import cv2 + from . import visualizer from . import methods @@ -20,6 +21,7 @@ from .method_chang_realtime_stereo import ChangRealtimeStereo from .method_hitnet import HitnetStereo from .method_sttr import StereoTransformers +from stereodemo.method_dist_depth import DistDepth def parse_args(): import argparse @@ -164,6 +166,7 @@ def main(): HitnetStereo(config), StereoTransformers(config), ChangRealtimeStereo(config), + DistDepth(config) ] if args.images: diff --git a/stereodemo/method_dist_depth.py b/stereodemo/method_dist_depth.py new file mode 100644 index 0000000..5e655ac --- /dev/null +++ b/stereodemo/method_dist_depth.py @@ -0,0 +1,92 @@ +from pathlib import Path +import shutil +import time +from dataclasses import dataclass +import urllib.request +import tempfile +import sys + +import torch +from torchvision import transforms + +import cv2 +import numpy as np + +from .methods import Calibration, Config, EnumParameter, StereoMethod, InputPair, StereoOutput +from . import utils + +urls = { + "dist-depth-256x256.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-distdepth/dist-depth-256x256.scripted.pt", +} + +# https://github.com/facebookresearch/DistDepth +# Exported via torch tracing by tweaking the original demo.py. +# Changes here: https://github.com/nburrus/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720 +class DistDepth(StereoMethod): + def __init__(self, config: Config): + super().__init__("[Monocular] DistDepth (CVPR 2022)", + "Toward Practical Monocular Indoor Depth Estimation.", + {}, + config) + self.reset_defaults() + + self.net = None + self._loaded_model_path = None + + def reset_defaults(self): + self.parameters.update ({ + # "Device": EnumParameter("Device", 0, ["CPU", "CUDA"]), + # For some reason it crashes with CUDA on my machine, disabling for now. + "Device": EnumParameter("Device", 0, ["CPU"]), + }) + + def compute_disparity(self, input: InputPair) -> StereoOutput: + # The pre-trained model is for 256x256. Their demo script resizes + # all input images to that. + self.target_size = (256, 256) + device = torch.device('cuda') if self.parameters["Device"].value == 'CUDA' else 'cpu' + + model_path = self.config.models_path / f'dist-depth-256x256.scripted.pt' + self._load_model (model_path) + + # raw_img can stay in BGR + raw_img = np.transpose(input.left_image, (2, 0, 1)) + input_image = torch.from_numpy(raw_img).float().to(device) + input_image = (input_image / 255.0).unsqueeze(0) + input_image = torch.nn.functional.interpolate( + input_image, (256, 256), mode="bilinear", align_corners=False + ) + + net = self.net.to(device) + + start = time.time() + with torch.no_grad(): + outputs = net(input_image.to(device)) + elapsed_time = time.time() - start + + disparity_map = self._process_output(outputs, input.calibration) + if disparity_map.shape[:2] != input.left_image.shape[:2]: + disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) + # not need to scale, the disparity values were already for the input full resolution calibration. + + return StereoOutput(disparity_map, input.left_image, elapsed_time) + + def _process_output(self, outputs, calib: Calibration): + depth_meters = outputs[0].detach().squeeze(0).cpu().numpy() + # The model directly gives a depth map in meters. Let's convert it + # to disparity to fit in the stereo display. + disparity_map = StereoMethod.disparity_from_depth_meters(depth_meters, calib) + return disparity_map + + def _load_model(self, model_path: Path): + if (self._loaded_model_path == model_path): + return + + if not model_path.exists(): + utils.download_model (urls[model_path.name], model_path) + + assert Path(model_path).exists() + self._loaded_model_path = model_path + self.net = torch.jit.load(model_path) + self.net.cpu () + self.net.eval () diff --git a/stereodemo/methods.py b/stereodemo/methods.py index b546193..5b0c9dd 100644 --- a/stereodemo/methods.py +++ b/stereodemo/methods.py @@ -118,3 +118,11 @@ def depth_meters_from_disparity(disparity_pixels: np.ndarray, calibration: Calib depth_meters[disparity_pixels < 0.] = -1.0 np.seterr(**old_seterr) return depth_meters + + def disparity_from_depth_meters(depth_meters: np.ndarray, calibration: Calibration): + old_seterr = np.seterr(divide='ignore') + dcx = np.float32(calibration.cx0 - calibration.cx1) + disparity_pixels = (np.float32(calibration.baseline_meters * calibration.fx) / depth_meters) + dcx + disparity_pixels = np.nan_to_num(disparity_pixels) + np.seterr(**old_seterr) + return disparity_pixels diff --git a/stereodemo/visualizer.py b/stereodemo/visualizer.py index 750ed16..026f6e1 100644 --- a/stereodemo/visualizer.py +++ b/stereodemo/visualizer.py @@ -145,7 +145,7 @@ def __init__(self, stereo_methods: Dict[str, StereoMethod], source: Source): self.algo_list = gui.ListView() self.algo_list.set_items(list(stereo_methods.keys())) self.algo_list.selected_index = 0 - self.algo_list.set_max_visible_items(7) + self.algo_list.set_max_visible_items(8) self.algo_list.set_on_selection_changed(self._on_algo_list_selected) self._settings_panel.add_child(self.algo_list)