v054 release (#6)

TUI-NICR · Jun 8, 2023 · dd4eacc · dd4eacc
1 parent 0baf3cf
commit dd4eacc
Show file tree

Hide file tree

Showing 11 changed files with 340 additions and 103 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -34,5 +34,19 @@
             "env": {
             },
         },
+        {
+            "name": "create scennetrgbd",
+            "type": "python",
+            "request": "launch",
+            "module": "nicr_scene_analysis_datasets.datasets.scenenetrgbd.prepare_dataset",
+            "console": "integratedTerminal",
+            "args": [
+                "/datasets_nas/nicr_scene_analysis_datasets/version_test/scenenetrgbd",
+                "/datasets_nas/segmentation/SceneNetRGBD",
+                "--n-random-views-to-include-train", "3",
+                "--n-random-views-to-include-valid", "6",
+                "--force-at-least-n-classes-in-view", "4",
+            ],
+        },
     ]
 }
diff --git a/README.md b/README.md
@@ -13,11 +13,11 @@ Currently, this packages features the following datasets and annotations:
 | [Hypersim](https://machinelearning.apple.com/research/hypersim)       | v052/v052      | RGB-D     | &#10003; | &#10003; | (&#10003;)\*\* | &#10003; | (&#10003;)\*\*\*\* | &#10003; | &#10003;   | &#10003;   |
 | [NYUv2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html)     | v030/v040      | RGB-D     | &#10003; | &#10003; | &#10003;\*\*\* | &#10003; | &#10003;           |          |            |            |
 | [ScanNet](http://www.scan-net.org/)                                   | v051/v051      | RGB-D     | &#10003; | &#10003; |                | &#10003; |                    |          | &#10003;   | &#10003;   |
-| [SceneNet RGB-D](https://robotvault.bitbucket.io/scenenet-rgbd.html)  | v030/v040      | RGB-D     | &#10003; |          |                |          |                    |          |            |            |
+| [SceneNet RGB-D](https://robotvault.bitbucket.io/scenenet-rgbd.html)  | v054/v054      | RGB-D     | &#10003; | &#10003; |                | &#10003; |                    |          |            |            |
 | [SUNRGB-D](https://rgbd.cs.princeton.edu/)                            | v030/v040      | RGB-D     | &#10003; | &#10003; |   &#10003;     | &#10003; |                    | &#10003; | &#10003;   | &#10003;   |
 
 \* Both depth and disparity are available.  
-\*\* Orientations are available but not consistent for instances of the same semantic class (see Hypersim).  
+\*\* Orientations are available but not consistent for instances within a semantic class (see Hypersim).  
 \*\*\* Annotated by hand in 3D for instances of some relevant semantic classes.  
 \*\*\*\* As of Nov 2022, [precomputed normals](https://cs.nyu.edu/~deigen/dnl/normals_gt.tgz) are not publicly available any longer. We are trying to reach the authors.
 
@@ -74,8 +74,8 @@ in IEEE International Conference on Robotics and Automation (ICRA), pp. 9221-922
 ```bibtex
 @inproceedings{semanticndtmapping2022icra,
   author={Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael},
-  booktitle={2022 International Conference on Robotics and Automation (ICRA)}, 
-  title={Efficient and Robust Semantic Mapping for Indoor Environments}, 
+  booktitle={2022 International Conference on Robotics and Automation (ICRA)},
+  title={Efficient and Robust Semantic Mapping for Indoor Environments},
   year={2022},
   pages={9221-9227},
   doi={10.1109/ICRA46639.2022.9812205}}
@@ -128,7 +128,7 @@ Please follow the instructions given in the respective dataset folder to prepare
 
 We provide several command-line entry points for common tasks:
   - `nicr_sa_prepare_dataset`: prepare a dataset for usage
-  - `nicr_sa_prepare_labeled_point_clouds`: create labeled point clouds as ply files similar to ScanNet benchmark 
+  - `nicr_sa_prepare_labeled_point_clouds`: create labeled point clouds as ply files similar to ScanNet benchmark
   - `nicr_sa_depth_viewer`: viewer for depth images
   - `nicr_sa_semantic_instance_viewer`: viewer for semantic and instance (and panoptic) annotations
   - `nicr_sa_labeled_pc_viewer`: viewer for labeled point clouds
@@ -238,12 +238,22 @@ For further details, we refer to the usage in our [EMSANet repository](https://g
 The dataset can be used as an iterator (detectron2 usually does this) and can then be mapped with the custom mappers to generate the correct layout of the data.
 
 ## Changelog
+**Version 0.5.4 (Jun 07, 2023)**
+- SUNRGB-D:
+  - fix for `depth_force_mm=True`:
+    - divide by 8 (shift by 3 to right) instead of divide by 10
+    - updated depth stats
+    - for more details, see notes in [nicr_scene_analysis_datasets/datasets/sunrgbd/dataset.py](nicr_scene_analysis_datasets/datasets/sunrgbd/dataset.py#L213)
+    - note, for `depth_force_mm=False`, nothing changed, everything is as before (EMSANet / EMSAFormer)
+- SceneNet RGB-D: add support for instances and scene classes
+- add `identifier2idx()` to base dataset class to search for samples by identifier
+
 **Version 0.5.3 (Mar 31, 2023)**
 - *no dataset preparation related changes*
 - minor changes to `nicr_sa_prepare_labeled_point_clouds` and `nicr_sa_labeled_pc_viewer`
 
 **Version 0.5.2 (Mar 28, 2023)**
-- hypersim: change instance encoding: do not view G and B channel as uint16 use bit shifting instead
+- Hypersim: change instance encoding: do not view G and B channel as uint16 use bit shifting instead
 - add new scripts and update entry points:
   - `nicr_sa_prepare_dataset`: prepare a dataset (replaces `python -m ...` calls)
   - `nicr_sa_prepare_labeled_point_clouds`: create labeled point clouds as ply files similar to ScanNet benchmark
@@ -253,7 +263,7 @@ The dataset can be used as an iterator (detectron2 usually does this) and can th
 
 **Version 0.5.1 (Mar 01, 2023)**
 - refactor MIRA reader to support multiple datasets, create an abstract base class
-- ScanNet: 
+- ScanNet:
   - blacklist broken frames due to invalid extrinsic parameters (see datasets/scannet/scannet.py)
 - Hypersim:
   - IMPORTANT: version 0.5.1 is not compatible with ealier versions of the dataset
@@ -274,7 +284,7 @@ The dataset can be used as an iterator (detectron2 usually does this) and can th
 
 **Version 0.4.1 (Nov 12, 2022)**
 - *no dataset preparation related changes*
-- make normal extraction for NYUv2 dataset optional as the [precomputed normals](https://cs.nyu.edu/~deigen/dnl/normals_gt.tgz) are not publicly available any longer 
+- make normal extraction for NYUv2 dataset optional as the [precomputed normals](https://cs.nyu.edu/~deigen/dnl/normals_gt.tgz) are not publicly available any longer
 
 **Version 0.4.0 (July 15, 2022)**
 - *no dataset preparation related changes*

diff --git a/nicr_scene_analysis_datasets/d2/_auto_init.py b/nicr_scene_analysis_datasets/d2/_auto_init.py
@@ -48,7 +48,7 @@
 register_dataset_to_d2(
     name_prefix='scenenetrgbd',
     dataset_class=SceneNetRGBD,
-    sample_keys=('identifier', 'rgb', 'semantic')
+    sample_keys=('identifier', 'rgb', 'semantic', 'instance')
 )
 register_dataset_to_d2(
     name_prefix='sunrgbd',

diff --git a/nicr_scene_analysis_datasets/dataset_base/_base_dataset.py b/nicr_scene_analysis_datasets/dataset_base/_base_dataset.py
@@ -168,6 +168,37 @@ def config(self) -> DatasetConfig:
     def get_available_sample_keys(split: str) -> Tuple[str]:
         pass
 
+    def identifier2idx(
+        self,
+        identifier: SampleIdentifier,
+        ensure_single_match: bool = True
+    ) -> int:
+        # this function might be useful for some use cases, however, as it
+        # always iterates all identifiers it is not very efficient, do not
+        # call it to much
+        idx = -1
+        for i in range(len(self)):
+            if self._load_identifier(i) == identifier:
+                if not ensure_single_match:
+                    # first match is enough
+                    return i
+                else:
+                    if -1 == idx:
+                        # first match
+                        idx = i
+                    else:
+                        # another match, raise error
+                        raise ValueError(
+                            f"Found at least two matches for identifier: "
+                            f"'{identifier}, indices: {idx} and {i}'."
+                        )
+
+        if -1 != idx:
+            # we got a single match, return it
+            return idx
+
+        raise ValueError(f"Could not find identifier: '{identifier}'")
+
     @abc.abstractmethod
     def _load_identifier(self, idx: int) -> SampleIdentifier:
         pass

diff --git a/nicr_scene_analysis_datasets/datasets/scenenetrgbd/dataset.py b/nicr_scene_analysis_datasets/datasets/scenenetrgbd/dataset.py
@@ -2,8 +2,9 @@
 """
 .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 """
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple, Union
 
+import json
 import os
 
 import cv2
@@ -27,6 +28,7 @@ def __init__(
         cameras: Optional[Tuple[str]] = None,
         depth_mode: str = 'refined',
         semantic_n_classes: int = 13,
+        scene_use_indoor_domestic_labels: bool = False,
         **kwargs: Any
     ) -> None:
         super().__init__(
@@ -44,6 +46,7 @@ def __init__(
         self._split = split
         self._depth_mode = depth_mode
         self._cameras = self.CAMERAS
+        self._scene_use_indoor_domestic_labels = scene_use_indoor_domestic_labels
 
         # cameras
         if cameras is None:
@@ -60,24 +63,26 @@ def __init__(
             assert os.path.exists(dataset_path), dataset_path
             self._dataset_path = dataset_path
 
-            # load file lists
-            def _loadtxt(fn):
-                return np.loadtxt(os.path.join(self._dataset_path, fn),
-                                  dtype=str)
-
-            self._files = {
-                'rgb': _loadtxt(f'{self._split}_rgb.txt'),
-                'depth': _loadtxt(f'{self._split}_depth.txt'),
-                'label': _loadtxt(f'{self._split}_labels_{self._semantic_n_classes}.txt')
-            }
-            assert all(len(li) == len(self._files['rgb'])
-                       for li in self._files.values())
+            # load file list
+            fp = os.path.join(self._dataset_path,
+                              self.SPLIT_FILELIST_FILENAMES[self._split])
+            with open(fp, 'r') as f:
+                self._files = f.read().splitlines()
+
         elif not self._disable_prints:
             print(f"Loaded SceneNetRGBD dataset without files")
 
+        if self._scene_use_indoor_domestic_labels:
+            # use remapped scene labels
+            scene_label_list = self.SCENE_LABEL_LIST_INDOOR_DOMESTIC
+        else:
+            # use original scene labels
+            scene_label_list = self.SCENE_LABEL_LIST
+
         # build config object
         self._config = build_dataset_config(
             semantic_label_list=self.SEMANTIC_LABEL_LIST,
+            scene_label_list=scene_label_list,
             depth_stats=self.TRAIN_SPLIT_DEPTH_STATS
         )
 
@@ -101,34 +106,62 @@ def depth_mode(self) -> str:
         return self._depth_mode
 
     def __len__(self) -> int:
-        return len(self._files['rgb'])
+        return len(self._files)
 
     @staticmethod
     def get_available_sample_keys(split: str) -> Tuple[str]:
         return SceneNetRGBDMeta.SPLIT_SAMPLE_KEYS[split]
 
-    def _load(self, directory, filename) -> np.ndarray:
+    def _load(
+        self,
+        directory: str,
+        idx: int,
+        extension: str = '.png'
+    ) -> Union[str, np.ndarray]:
+        # determine filepath
         fp = os.path.join(self._dataset_path,
                           self.split,
                           directory,
-                          filename)
-        img = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
-        if img is None:
-            raise IOError(f"Unable to load image: '{fp}'")
-        if img.ndim == 3:
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                          f'{self._files[idx]}{extension}')
+
+        # load data
+        if '.txt' == extension:
+            with open(fp, 'r') as f:
+                data = f.readline()
+        else:
+            # default load using OpenCV
+            data = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
+            if data is None:
+                raise IOError(f"Unable to load image: '{fp}'")
+            if data.ndim == 3:
+                data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
 
-        return img
+        return data
 
     def _load_rgb(self, idx: int) -> np.ndarray:
-        return self._load(self.RGB_DIR, self._files['rgb'][idx])
+        return self._load(self.RGB_DIR, idx, '.jpg')
 
     def _load_depth(self, idx: int) -> np.ndarray:
-        return self._load(self.DEPTH_DIR, self._files['depth'][idx])
+        return self._load(self.DEPTH_DIR, idx)
 
     def _load_identifier(self, idx: int) -> Tuple[str]:
-        fn, _ = os.path.splitext(self._files['rgb'][idx])
+        fn = self._files[idx]
         return SampleIdentifier(os.path.normpath(fn).split(os.sep))
 
     def _load_semantic(self, idx: int) -> np.ndarray:
-        return self._load(self.SEMANTIC_13_DIR, self._files['label'][idx])
+        return self._load(self.SEMANTIC_13_DIR, idx)
+
+    def _load_instance(self, idx: int) -> np.ndarray:
+        return self._load(self.INSTANCES_DIR, idx).astype('int32')
+
+    def _load_scene(self, idx: int) -> int:
+        class_str = self._load(self.SCENE_CLASS_DIR, idx, '.txt')
+
+        class_idx = self.SCENE_LABEL_LIST.index(class_str)
+
+        if self._scene_use_indoor_domestic_labels:
+            # map class to indoor domestic environment labels
+            mapping = self.SCENE_LABEL_IDX_TO_SCENE_LABEL_INDOOR_DOMESTIC_IDX
+            class_idx = mapping[class_idx]
+
+        return class_idx