From 22608a20766297da55fb47a7b1957d2174cc0547 Mon Sep 17 00:00:00 2001
From: Georgios Albanis <giorgos_al10@hotmail.com>
Date: Wed, 11 Sep 2024 08:44:48 +0300
Subject: [PATCH] refactor: Update streaming opt, rr viz & add missing monad
 conf

 -  to use vstack for arrays with more than 1 dimension and hstack for 1-dimensional arrays
 - log frustums & kpts without images
---
 .../monads/generation/tensor/identity.yaml    |  2 +-
 moai/serve/streaming_optimizer.py             | 19 ++++++++++++++---
 moai/visualization/rerun/image.py             | 21 ++++++++++---------
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/moai/conf/model/monads/generation/tensor/identity.yaml b/moai/conf/model/monads/generation/tensor/identity.yaml
index 6c070d1b..73f2829a 100644
--- a/moai/conf/model/monads/generation/tensor/identity.yaml
+++ b/moai/conf/model/monads/generation/tensor/identity.yaml
@@ -1,3 +1,3 @@
-# @package model.monads._name_
+# @package model.monads.identity
 
 _target_: moai.monads.generation.tensor.Identity
\ No newline at end of file
diff --git a/moai/serve/streaming_optimizer.py b/moai/serve/streaming_optimizer.py
index e5c0cad4..987daf5c 100644
--- a/moai/serve/streaming_optimizer.py
+++ b/moai/serve/streaming_optimizer.py
@@ -124,6 +124,16 @@ def initialize(self, context):
         except Exception as e:
             log.error(f"An error has occured while loading the trainer:\n{e}")
 
+    @staticmethod
+    def stack_based_on_dim(arr):
+        """
+        Stacks using vstack for arrays with more than 1 dimension and hstack for 1-dimensional arrays.
+        """
+        if np.ndim(arr[0]) > 1:
+            return np.vstack(arr)
+        else:
+            return np.hstack(arr)
+
     def handle(self, data: typing.Mapping[str, typing.Any], context: typing.Any):
         """
         Handle function responsible for returning an intermediate response.
@@ -206,12 +216,15 @@ def handle(self, data: typing.Mapping[str, typing.Any], context: typing.Any):
                         unit="value",
                         metric_type=MetricTypes.GAUGE,
                     )
-
+            # DEBUG: break after 10 batches
+            if batch_idx > 10:
+                break
         # call on epoch end callbacks
         call._call_callback_hooks(self.trainer, "on_train_epoch_end")
-        result = toolz.valmap(np.vstack, result)
+        # result = toolz.valmap(np.vstack, result)
+        result = toolz.valmap(self.stack_based_on_dim, result)
         # result and original input data should be available in the post processing handler
-        output = self.postprocess(toolz.merge(data, result))
+        output = self.postprocess(toolz.merge(td, result))
         # TODO: add post processing handler
         stop_time = time.time()
         self.context.metrics.add_time(
diff --git a/moai/visualization/rerun/image.py b/moai/visualization/rerun/image.py
index eeab4c5d..329ff97b 100644
--- a/moai/visualization/rerun/image.py
+++ b/moai/visualization/rerun/image.py
@@ -69,8 +69,8 @@ def multiframe_multiview_posed_image(
 
 
 def multicam_posed_image(
-    images: np.ndarray,  # assumes num_cams x [C, H, W]
     path: str,
+    images: np.ndarray = None,  # assumes num_cams x [C, H, W]
     poses: np.ndarray = None,  # assumes num_cams x [4, 4];
     intrinsics: np.ndarray = None,  # assumes num_cams x [3, 3];
     optimization_step: typing.Optional[int] = None,
@@ -85,8 +85,8 @@ def multicam_posed_image(
     elif iteration is not None:
         rr.set_time_sequence("iteration", iteration)
     # get number of cameras
-    num_cams = images.shape[0]
-    _, __, H, W = images.shape
+    num_cams = intrinsics.shape[0]
+    # _, __, H, W = images.shape
     for i in range(num_cams):
         rr.log(
             path + f"/cam_{i}",
@@ -103,13 +103,14 @@ def multicam_posed_image(
             ),
         )
         # log image
-        image = np.ascontiguousarray(images[i].transpose(-2, -1, 0) * 255).astype(
-            np.uint8
-        )
-        rr.log(
-            path + f"/cam_{i}",
-            rr.Image(image).compress(jpeg_quality=jpeg_quality),
-        )
+        if images is not None:
+            image = np.ascontiguousarray(images[i].transpose(-2, -1, 0) * 255).astype(
+                np.uint8
+            )
+            rr.log(
+                path + f"/cam_{i}",
+                rr.Image(image).compress(jpeg_quality=jpeg_quality),
+            )
 
 
 def posed_image(