v060 release (#10)

TUI-NICR · Sep 26, 2023 · 90344ec · 90344ec
1 parent e666d2b
commit 90344ec
Show file tree

Hide file tree

Showing 20 changed files with 921 additions and 534 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -48,5 +48,21 @@
                 "--force-at-least-n-classes-in-view", "4",
             ],
         },
+        {
+            "name": "create sunrgbd v060",
+            "type": "python",
+            "request": "launch",
+            "module": "nicr_scene_analysis_datasets.datasets.sunrgbd.prepare_dataset",
+            "console": "integratedTerminal",
+            "args": [
+                "/local/datasets/sunrgbd_test",
+                "--toolbox-filepath", "/local/datasets/raw/sunrgbd/SUNRGBDtoolbox.zip",
+                "--data-filepath", "/local/datasets/raw/sunrgbd/SUNRGBD.zip",
+                "--box-filepath", "/local/datasets/raw/sunrgbd/SUNRGBDMeta3DBB_v2.mat",
+                "--create-instances",
+                "--copy-instances-from-nyuv2",
+                "--nyuv2-path", "/datasets_nas/nicr_scene_analysis_datasets/version_060/nyuv2"
+            ],
+        },
     ]
 }
diff --git a/README.md b/README.md
@@ -8,13 +8,13 @@ Currently, this packages features the following datasets and annotations:
 
 | Dataset                                                               | Updated/Tested |   Type    | Semantic | Instance |  Orientations  |  Scene   |       Normal       | 3D Boxes | Extrinsics | Intrinsics |
 |:----------------------------------------------------------------------|:--------------:|:---------:|:--------:|:--------:|:--------------:|:--------:|:------------------:|:--------:|:----------:|:----------:|
-| [COCO](https://cocodataset.org/#home)                                 | v030/v040      | RGB       | &#10003; | &#10003; |                |          |                    |          |            |            |
-| [Cityscapes](https://www.cityscapes-dataset.com/)                     | v050/v050      | RGB-D\*   | &#10003; | &#10003; |                |          |                    |          |            |            |
-| [Hypersim](https://machinelearning.apple.com/research/hypersim)       | v052/v052      | RGB-D     | &#10003; | &#10003; | (&#10003;)\*\* | &#10003; | (&#10003;)\*\*\*\* | &#10003; | &#10003;   | &#10003;   |
-| [NYUv2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html)     | v030/v040      | RGB-D     | &#10003; | &#10003; | &#10003;\*\*\* | &#10003; | &#10003;           |          |            |            |
-| [ScanNet](http://www.scan-net.org/)                                   | v051/v051      | RGB-D     | &#10003; | &#10003; |                | &#10003; |                    |          | &#10003;   | &#10003;   |
-| [SceneNet RGB-D](https://robotvault.bitbucket.io/scenenet-rgbd.html)  | v054/v054      | RGB-D     | &#10003; | &#10003; |                | &#10003; |                    |          |            |            |
-| [SUNRGB-D](https://rgbd.cs.princeton.edu/)                            | v030/v040      | RGB-D     | &#10003; | &#10003; |   &#10003;     | &#10003; |                    | &#10003; | &#10003;   | &#10003;   |
+| [COCO](https://cocodataset.org/#home)                                 | v030/v060      | RGB       | &#10003; | &#10003; |                |          |                    |          |            |            |
+| [Cityscapes](https://www.cityscapes-dataset.com/)                     | v050/v060      | RGB-D\*   | &#10003; | &#10003; |                |          |                    |          |            |            |
+| [Hypersim](https://machinelearning.apple.com/research/hypersim)       | v052/v060      | RGB-D     | &#10003; | &#10003; | (&#10003;)\*\* | &#10003; | (&#10003;)\*\*\*\* | &#10003; | &#10003;   | &#10003;   |
+| [NYUv2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html)     | v030/v060      | RGB-D     | &#10003; | &#10003; | &#10003;\*\*\* | &#10003; | &#10003;           |          |            |            |
+| [ScanNet](http://www.scan-net.org/)                                   | v051/v060      | RGB-D     | &#10003; | &#10003; |                | &#10003; |                    |          | &#10003;   | &#10003;   |
+| [SceneNet RGB-D](https://robotvault.bitbucket.io/scenenet-rgbd.html)  | v054/v060      | RGB-D     | &#10003; | &#10003; |                | &#10003; |                    |          |            |            |
+| [SUNRGB-D](https://rgbd.cs.princeton.edu/)                            | v060/v060      | RGB-D     | &#10003; | &#10003; |   &#10003;     | &#10003; |                    | &#10003; | &#10003;   | &#10003;   |
 
 \* Both depth and disparity are available.  
 \*\* Orientations are available but not consistent for instances within a semantic class (see Hypersim).  
@@ -131,6 +131,7 @@ python -m pip install . [--user]
 Please follow the instructions given in the respective dataset folder to prepare the datasets.
 
 - [Cityscapes](nicr_scene_analysis_datasets/datasets/cityscapes)
+- [COCO](nicr_scene_analysis_datasets/datasets/coco)
 - [Hypersim](nicr_scene_analysis_datasets/datasets/hypersim)
 - [NYUv2](nicr_scene_analysis_datasets/datasets/nyuv2)
 - [ScanNet](nicr_scene_analysis_datasets/datasets/scannet)
@@ -148,7 +149,7 @@ We provide several command-line entry points for common tasks:
   - `nicr_sa_semantic_instance_viewer`: viewer for semantic and instance (and panoptic) annotations
   - `nicr_sa_labeled_pc_viewer`: viewer for labeled point clouds
 
-### How to use the dataset in our pipeline
+### How to use a dataset in a pipeline
 In the following, an example for Hypersim is given.
 
 First, specify the dataset path:
@@ -253,6 +254,36 @@ For further details, we refer to the usage in our [EMSANet repository](https://g
 The dataset can be used as an iterator (detectron2 usually does this) and can then be mapped with the custom mappers to generate the correct layout of the data.
 
 ## Changelog
+
+**Version 0.6.0 (Sep 26, 2023)**
+- SUNRGB-D:
+  - refactor and update instance creation from 3D boxes: annotations for
+    instances, boxes, and (instance) orientations have changed:
+    - ignore semantic stuff classes and void while matching boxes and point
+      clouds
+    - enhanced matching for similar classes (e.g., table <-> desk)
+    - resulting annotations feature a lot of more instances
+    - if you use the new instance annotations, please refer to this version of
+      the dataset as *SUNRGB-D (PanopticNDT version)* and to previous versions
+      with instance information as *SUNRGB-D (EMSANet version)*
+  - **note, version 0.6.0 is NOT compatible with previous versions, you will
+    get deviating results when applying EMSANet or EMSAFormer**
+- Hypersim:
+  - add more notes/comments for blacklisted scenes/camera trajectories
+  - do not use orientations by default (annotations provided by the dataset are
+    not consistent for instances within a semantic class), i.e., return an
+    empty OrientationDict for all samples unless `orientations_use` is enabled    
+- `nicr_sa_labeled_pc_viewer`: add `--max-z-value` argument to limit the
+  maximum z-value for the point cloud viewer
+- `nicr_sa_depth_viewer`: add `image_nonzero` mode for scaling depth values
+  (`--mode` argument)
+- MIRA readers:
+  - add instance meta stuff
+  - terminate MIRA in a softer way (do not send SIGKILL, send SIGINT instead
+    and wait before sending again) to force propper termination (and profile
+    creation)
+- some test fixes
+
 **Version 0.5.6 (Sep 26, 2023)**
 - `ConcatDataset`: 
   - add `datasets` property to get the list of currently active datasets

diff --git a/nicr_scene_analysis_datasets/dataset_base/_base_dataset.py b/nicr_scene_analysis_datasets/dataset_base/_base_dataset.py
@@ -181,7 +181,7 @@ def identifier2idx(
         # call it to much
         idx = -1
         for i in range(len(self)):
-            if self._load_identifier(i) == identifier:
+            if self._load_identifier(i) == tuple(identifier):
                 if not ensure_single_match:
                     # first match is enough
                     return i

diff --git a/nicr_scene_analysis_datasets/dataset_base/_class_weighting.py b/nicr_scene_analysis_datasets/dataset_base/_class_weighting.py
@@ -43,7 +43,7 @@ def print_(*args, **kwargs):
         )
         weight_mode = 'none'
 
-    print_(f"Computing class weights for '{sample_key}' ...")
+    print_(f"Computing '{weight_mode}' class weights for '{sample_key}' ...")
 
     if 'none' == weight_mode:
         # equal weights for all classes -> disables class weighting

diff --git a/nicr_scene_analysis_datasets/datasets/hypersim/dataset.py b/nicr_scene_analysis_datasets/datasets/hypersim/dataset.py
@@ -34,6 +34,7 @@ def __init__(
         use_cache: bool = False,
         cameras: Optional[Tuple[str]] = None,
         depth_mode: str = 'raw',
+        orientations_use: bool = False,
         scene_use_indoor_domestic_labels: bool = False,
         **kwargs: Any
     ) -> None:
@@ -53,6 +54,7 @@ def __init__(
 
         self._split = split
         self._depth_mode = depth_mode
+        self._orientations_use = orientations_use
         self._scene_use_indoor_domestic_labels = scene_use_indoor_domestic_labels
 
         # cameras
@@ -226,6 +228,13 @@ def _load_instance(self, idx: int) -> np.ndarray:
         return instance.astype('int32')
 
     def _load_orientations(self, idx: int) -> Dict[int, float]:
+        # be aware that provided dataset orientations might not be consistent
+        # for instances within the same semantic class, thus, we disable
+        # instances by default
+        if not self._orientations_use:
+            return OrientationDict({})
+
+        # use orientations provided by the dataset
         fp = os.path.join(self._dataset_path,
                           self.split,
                           self.ORIENTATIONS_DIR,

diff --git a/nicr_scene_analysis_datasets/datasets/hypersim/hypersim.py b/nicr_scene_analysis_datasets/datasets/hypersim/hypersim.py
@@ -226,22 +226,60 @@ def get_split_filelist_filenames(cls, subsample=None):
 
     # exclude scenes/camera trajectories that are probably detrimental to
     # training used in prepare_dataset
+    # note that we process all scenes in prepare_dataset, but listed scenes/
+    # camera trajectories below are excluded from split files, so you can go
+    # through to have a look
     BLACKLIST = {
-        'ai_003_001': 'cam_00',  # train
-        'ai_004_009': 'cam_01',  # train
-        'ai_013_001': '*',  # train (no depth)
-        'ai_015_006': '*',  # train
-        'ai_023_008': 'cam_01',  # train (no labels)
-        'ai_026_020': '*',  # train (no labels)
-        'ai_023_009': '*',  # train (no labels)
-        'ai_038_007': '*',  # train
-        'ai_023_004': 'cam_01',  # train (no labels)
-        'ai_023_006': '*',  # train (no labels)
-        'ai_026_013': '*',  # train (no labels)
-        'ai_026_018': '*',  # train (no labels)
-        # 'ai_044_004': 'cam_01',  # test (no labels)
+        # train (no rgb)
+        'ai_003_001': 'cam_00',
+
+        # train (no rgb)
+        'ai_004_009': 'cam_01',
+
+        # train ('other' -> some outdoor stuff with large columns, no useful
+        # rgb, depth, or labels)
+        'ai_013_001': '*',
+
+        # train (missing textures in rgb)
+        'ai_015_006': '*',
+
+        # train ('hall', no useful labels: only ceiling, cam_00 is also bad)
+        'ai_023_008': 'cam_01',
+
+        # train ('hallway', no useful labels: only wall/floor)
+        'ai_026_020': '*',
+
+        # train ('hall', no useful trajectories, no useful labels: only wall)
+        'ai_023_009': '*',
+
+        # train ('transit station', missing textures in rgb, no useful labels)
+        'ai_038_007': '*',
+
+        # train ('hall', no useful labels: only wall)
+        'ai_023_004': 'cam_01',
+
+        # train ('staircase', no labels: almost all pixels are void)
+        'ai_023_006': '*',
+
+        # train ('hall', no useful labels: most pixels are void, some wall/floor)
+        'ai_026_013': '*',
+
+        # train ('hall', no useful labels: most pixels are void, some floor)
+        'ai_026_018': '*',
+
+        # test ('hotel lobby', no useful labels: almost all pixels are void)
+        # as it is test, we do not exclude it
+        # 'ai_044_004': 'cam_01',
+
+        # train ('living room', missing textures in rgb)
         'ai_046_001': '*',  # train
-        # 'ai_046_009': '*',         # already excluded in csv
-        'ai_048_004': '*',  # train
-        'ai_053_005': '*'  # val
+
+        # already excluded in csv (not part of public release)
+        # 'ai_046_009': '*',
+
+        # train ('bedroom', rgb does not really looks realistic)
+        'ai_048_004': '*',
+
+        # valid ('lecture theater', missing textures in rgb)
+        'ai_053_005': '*'
     }
diff --git a/nicr_scene_analysis_datasets/datasets/hypersim/prepare_dataset.py b/nicr_scene_analysis_datasets/datasets/hypersim/prepare_dataset.py
@@ -98,13 +98,13 @@ def save_indexed_png(filepath, label, colormap):
     return save_indexed_png_(filepath, label, colormap)
 
 
-def save_pcd(filepath, pointcloud):
-    height, width = pointcloud.shape[:2]
+def save_pc(filepath, pc):
+    height, width = pc.shape[:2]
 
     file_extension = os.path.splitext(filepath)[1]
 
     if file_extension == '.npz':
-        np.savez_compressed(filepath, pointcloud)
+        np.savez_compressed(filepath, pc)
 
     elif file_extension == '.pcd':
 
@@ -122,10 +122,10 @@ def save_pcd(filepath, pointcloud):
             f.write('DATA ascii\n')
 
             # write point data
-            for point in pointcloud.reshape(-1, 3):
+            for point in pc.reshape(-1, 3):
                 f.write(f'{point[0]} {point[1]} {point[2]}\n')
     else:
-        raise ValueError('Filetype for pointcloud not understood')
+        raise ValueError('Filetype for point cloud not understood')
 
 
 def load_pc(scene_path, cam='cam_00', frame=0):
@@ -142,7 +142,7 @@ def load_pc(scene_path, cam='cam_00', frame=0):
 
     height, width = pc.shape[:2]
 
-    # load extrinsic as the pointcloud is given in world coordinate frame
+    # load extrinsic as the point cloud is given in world coordinate frame
     extrinsics = load_extrinsics(scene_path, cam, frame)
 
     rotation = Rotation.from_quat(
@@ -152,7 +152,7 @@ def load_pc(scene_path, cam='cam_00', frame=0):
     transform[:3, :3] = rotation.as_matrix()
     transform[:3, 3] = [extrinsics[t] for t in ['x', 'y', 'z']]
 
-    # flatten pointcloud for transformation
+    # flatten point cloud for transformation
     pc = pc.reshape(-1, 3)
 
     # change to homogenous coordinates
@@ -171,7 +171,7 @@ def compute_point_cloud(scene_path, cam, frame, camera_params):
     # code adapted from:
     # https://github.com/apple/ml-hypersim/blob/main/contrib/mikeroberts3000/jupyter/01_casting_rays_that_match_hypersim_images.ipynb
 
-    # note, we do not use the precomputed pointclouds as they are of type
+    # note, we do not use the precomputed point clouds as they are of type
     # float16 and, thus, not precise enough for our purposes
 
     # distance image (note its the distance along the ray, not the distance to
@@ -208,7 +208,7 @@ def compute_point_cloud(scene_path, cam, frame, camera_params):
     # rays = np.dot(M_cam_from_uv, uvs_2d.reshape(-1, 3).T).T
     rays = np.dot(uvs_2d.reshape(-1, 3), M_cam_from_uv.T)   # (h*w, 3)
 
-    # compute pointcloud
+    # compute point cloud
     normed_rays = rays / np.linalg.norm(rays, ord=2, axis=-1, keepdims=True)
     points_cam = normed_rays * distance_img_meters.reshape(-1, 1)   # (h*w, 3)
 
@@ -622,7 +622,7 @@ def process_one_sample(
                           if k not in ('a', 'b')}
         depth_intrinsics = intrinsics
 
-        # get pointcloud -------------------------------------------------------
+        # get point cloud ------------------------------------------------------
         # note, we do not use the precomputed point cloud as they is of type
         # float16 and, thus, not precise enough for our purposes (i.e,
         # introduces more artifacts in the back-projection)
@@ -633,7 +633,7 @@ def process_one_sample(
             camera_params=cp
         )
 
-        # get depth image as z coordinate from pointcloud ----------------------
+        # get depth image as z coordinate from point cloud ---------------------
         depth = points_cam[:, :, -1]    # use z as depth
         depth = depth * 1000    # convert to mm
         depth[depth > np.iinfo('uint16').max] = 0    # clip to 65.535m

diff --git a/nicr_scene_analysis_datasets/datasets/nyuv2/dataset.py b/nicr_scene_analysis_datasets/datasets/nyuv2/dataset.py
@@ -168,12 +168,6 @@ def _load_scene(self, idx: int) -> int:
                           self.split,
                           self.SCENE_CLASS_DIR,
                           f'{self._filenames[idx]}.txt')
-        if not os.path.isfile(fp):
-            # catch common error
-            raise FileNotFoundError(
-                "Scene class file not found. Maybe the SUNRGBD matching was "
-                "not done yet."
-            )
         with open(fp, 'r') as f:
             class_str = f.readline()
 

diff --git a/nicr_scene_analysis_datasets/datasets/sunrgbd/match_nyuv2_instances.py b/nicr_scene_analysis_datasets/datasets/sunrgbd/match_nyuv2_instances.py
@@ -21,6 +21,8 @@ def __init__(self, sunrgbd_path, nyuv2_path):
         # seem to contain the cropping parameters, so we estimated them with the
         # calc_crop function. Cropping parameters seemed to be constants, so
         # we hardcoded them.
+        # Note the cropping parameters derived are also hardcoded in the NYUv2
+        # toolbox: toolbox_nyu_depth_v2/get_projection_mask.m
         self.crop = (44, 40, 471, 601)
 
     def get_sunrgbd_path_for_nyuv2(self, split, folder):