From 499c7caeebe0763ceefb00fdf842e327c9e303c2 Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Sun, 7 Jan 2024 20:18:11 +0100
Subject: [PATCH 01/10] internal support for batch transforms

---
 luxonis_ml/data/augmentations.py              |  30 +-
 luxonis_ml/data/utils/aug_utils/__init__.py   |   5 +
 .../data/utils/aug_utils/batch_compose.py     | 385 ++++++++++++++++++
 .../data/utils/aug_utils/batch_processors.py  | 136 +++++++
 .../data/utils/aug_utils/batch_transform.py   |  56 +++
 .../data/utils/aug_utils/batch_utils.py       | 105 +++++
 luxonis_ml/data/utils/aug_utils/mosaic4.py    | 340 ++++++++++++++++
 7 files changed, 1047 insertions(+), 10 deletions(-)
 create mode 100644 luxonis_ml/data/utils/aug_utils/__init__.py
 create mode 100644 luxonis_ml/data/utils/aug_utils/batch_compose.py
 create mode 100644 luxonis_ml/data/utils/aug_utils/batch_processors.py
 create mode 100644 luxonis_ml/data/utils/aug_utils/batch_transform.py
 create mode 100644 luxonis_ml/data/utils/aug_utils/batch_utils.py
 create mode 100644 luxonis_ml/data/utils/aug_utils/mosaic4.py

diff --git a/luxonis_ml/data/augmentations.py b/luxonis_ml/data/augmentations.py
index 02832884..d53164e6 100644
--- a/luxonis_ml/data/augmentations.py
+++ b/luxonis_ml/data/augmentations.py
@@ -14,9 +14,11 @@
     KeypointType,
 )
 
+import luxonis_ml.data.utils.aug_utils as A_batched
 from luxonis_ml.enums import LabelType
 from luxonis_ml.utils.registry import Registry
 
+
 AUGMENTATIONS = Registry(name="augmentations")
 
 
@@ -38,7 +40,7 @@ def _parse_cfg(
         image_size: List[int],
         augmentations: List[Dict[str, Any]],
         keep_aspect_ratio: bool = True,
-    ) -> Tuple[A.BatchCompose, A.Compose]:
+    ) -> Tuple[A_batched.BatchCompose, A.Compose]:
         """Parses provided config and returns Albumentations BatchedCompose object and
         Compose object for default transforms.
 
@@ -71,16 +73,16 @@ def _parse_cfg(
                     pixel_augs.append(curr_aug)
                 elif isinstance(curr_aug, A.DualTransform):
                     spatial_augs.append(curr_aug)
-                elif isinstance(curr_aug, A.BatchBasedTransform):
+                elif isinstance(curr_aug, A_batched.BatchBasedTransform):
                     self.is_batched = True
                     self.aug_batch_size = max(self.aug_batch_size, curr_aug.n_tiles)
                     batched_augs.append(curr_aug)
         # NOTE: always perform resize last
         spatial_augs.append(resize)
 
-        batch_transform = A.BatchCompose(
+        batch_transform = A_batched.BatchCompose(
             [
-                A.ForEach(pixel_augs),
+                A_batched.ForEach(pixel_augs),
                 *batched_augs,
             ],
             bbox_params=A.BboxParams(
@@ -217,7 +219,15 @@ def __call__(
 
     def prepare_img_annotations(
         self, annotations: Dict[LabelType, np.ndarray], ih: int, iw: int
-    ) -> Tuple[np.ndarray]:
+    ) -> Tuple[
+        np.ndarray,
+        np.ndarray,
+        np.ndarray,
+        np.ndarray,
+        np.ndarray,
+        np.ndarray,
+        np.ndarray,
+    ]:
         """Prepare annotations to be compatible with albumentations.
 
         @type annotations: Dict[LabelType, np.ndarray]
@@ -226,7 +236,7 @@ def prepare_img_annotations(
         @param ih: Input image height
         @type iw: int
         @param iw: Input image width
-        @rtype: Tuple[np.ndarray]
+        @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]
         @return: Annotations in albumentations format
         """
 
@@ -272,7 +282,7 @@ def post_transform_process(
         ns: int,
         nk: int,
         filter_kpts_by_bbox: bool,
-    ) -> Tuple[np.ndarray]:
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """Postprocessing of albumentations output to LuxonisLoader format.
 
         @type transformed_data: Dict[str, np.ndarray]
@@ -284,7 +294,7 @@ def post_transform_process(
         @type filter_kpts_by_bbox: bool
         @param filter_kpts_by_bbox: If True removes keypoint instances if its bounding
             box was removed.
-        @rtype: Tuple[np.ndarray]
+        @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
         @return: Postprocessed annotations
         """
 
@@ -809,7 +819,7 @@ def _out_of_bounds(self, value: float, min_limit: float, max_limit: float) -> bo
 
 
 @AUGMENTATIONS.register_module(name="Mosaic4")
-class DeterministicMosaic4(A.Mosaic4):
+class DeterministicMosaic4(A_batched.Mosaic4):
     def __init__(
         self,
         out_height: int,
@@ -886,7 +896,7 @@ def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, A
 
 
 @AUGMENTATIONS.register_module()
-class MixUp(A.BatchBasedTransform):
+class MixUp(A_batched.BatchBasedTransform):
     def __init__(
         self,
         alpha: Union[float, Tuple[float, float]] = 0.5,
diff --git a/luxonis_ml/data/utils/aug_utils/__init__.py b/luxonis_ml/data/utils/aug_utils/__init__.py
new file mode 100644
index 00000000..16f11108
--- /dev/null
+++ b/luxonis_ml/data/utils/aug_utils/__init__.py
@@ -0,0 +1,5 @@
+from .batch_compose import BatchCompose, ForEach, Repeat
+from .batch_transform import BatchBasedTransform
+from .mosaic4 import Mosaic4
+
+__all__ = ["BatchCompose", "ForEach", "Repeat", "BatchBasedTransform", "Mosaic4"]
diff --git a/luxonis_ml/data/utils/aug_utils/batch_compose.py b/luxonis_ml/data/utils/aug_utils/batch_compose.py
new file mode 100644
index 00000000..454934ef
--- /dev/null
+++ b/luxonis_ml/data/utils/aug_utils/batch_compose.py
@@ -0,0 +1,385 @@
+import random
+import numpy as np
+from typing import Optional, Union, Dict, Any, List, cast
+from albumentations.core.composition import (
+    BaseCompose,
+    TransformsSeqType,
+    get_always_apply,
+)
+from albumentations.core.bbox_utils import (
+    DataProcessor,
+    BboxProcessor,
+    BboxParams,
+)
+from albumentations.core.keypoints_utils import KeypointParams, KeypointsProcessor
+from albumentations.core.utils import get_shape
+from .batch_processors import BboxBatchProcessor, KeypointsBatchProcessor
+from .batch_utils import *
+
+
+class Compose(BaseCompose):
+    """Compose transforms and handle all transformations regarding bounding boxes
+
+    Args:
+        transforms (list): list of transformations to compose.
+        bbox_params (BboxParams): Parameters for bounding boxes transforms
+        keypoint_params (KeypointParams): Parameters for keypoints transforms
+        additional_targets (dict): Dict with keys - new target name, values - old target name. ex: {'image2': 'image'}
+        p (float): probability of applying all list of transforms. Default: 1.0.
+        is_check_shapes (bool): If True shapes consistency of images/mask/masks would be checked on each call. If you
+            would like to disable this check - pass False (do it only if you are sure in your data consistency).
+    """
+
+    def __init__(
+        self,
+        transforms: TransformsSeqType,
+        bbox_params: Optional[Union[dict, "BboxParams"]] = None,
+        keypoint_params: Optional[Union[dict, "KeypointParams"]] = None,
+        additional_targets: Optional[Dict[str, str]] = None,
+        p: float = 1.0,
+        is_check_shapes: bool = True,
+    ):
+        super(Compose, self).__init__(transforms, p)
+
+        self.processors: Dict[str, DataProcessor] = {}
+
+        if bbox_params:
+            if isinstance(bbox_params, dict):
+                b_params = BboxParams(**bbox_params)
+            elif isinstance(bbox_params, BboxParams):
+                b_params = bbox_params
+            else:
+                raise ValueError(
+                    "unknown format of bbox_params, please use `dict` or `BboxParams`"
+                )
+            self.processors["bboxes"] = self._get_bbox_processor(
+                b_params, additional_targets
+            )
+
+        if keypoint_params:
+            if isinstance(keypoint_params, dict):
+                k_params = KeypointParams(**keypoint_params)
+            elif isinstance(keypoint_params, KeypointParams):
+                k_params = keypoint_params
+            else:
+                raise ValueError(
+                    "unknown format of keypoint_params, please use `dict` or `KeypointParams`"
+                )
+            self.processors["keypoints"] = self._get_keypoints_processor(
+                k_params, additional_targets
+            )
+
+        if additional_targets is None:
+            additional_targets = {}
+
+        self.additional_targets = additional_targets
+
+        for proc in self.processors.values():
+            proc.ensure_transforms_valid(self.transforms)
+
+        self.add_targets(additional_targets)
+
+        self.is_check_args = True
+        self._disable_check_args_for_transforms(self.transforms)
+
+        self.is_check_shapes = is_check_shapes
+
+    def _get_bbox_processor(self, b_params, additional_targets):
+        return BboxProcessor(b_params, additional_targets)
+
+    def _get_keypoints_processor(self, k_params, additional_targets):
+        return KeypointsProcessor(k_params, additional_targets)
+
+    @staticmethod
+    def _disable_check_args_for_transforms(transforms: TransformsSeqType) -> None:
+        for transform in transforms:
+            if isinstance(transform, BaseCompose):
+                Compose._disable_check_args_for_transforms(transform.transforms)
+            if isinstance(transform, Compose):
+                transform._disable_check_args()
+
+    def _disable_check_args(self) -> None:
+        self.is_check_args = False
+
+    def __call__(self, *args, force_apply: bool = False, **data) -> Dict[str, Any]:
+        if args:
+            raise KeyError(
+                "You have to pass data to augmentations as named arguments, for example: aug(image=image)"
+            )
+        if self.is_check_args:
+            self._check_args(self.additional_targets, **data)
+        assert isinstance(
+            force_apply, (bool, int)
+        ), "force_apply must have bool or int type"
+        need_to_run = force_apply or random.random() < self.p
+        for p in self.processors.values():
+            p.ensure_data_valid(data)
+        transforms = (
+            self.transforms if need_to_run else get_always_apply(self.transforms)
+        )
+
+        check_each_transform = any(
+            getattr(item.params, "check_each_transform", False)
+            for item in self.processors.values()
+        )
+
+        for p in self.processors.values():
+            p.preprocess(data)
+
+        for _, t in enumerate(transforms):
+            data = t(**data)
+            if check_each_transform:
+                data = self._check_data_post_transform(data)
+
+        data = self._make_targets_contiguous(
+            data
+        )  # ensure output targets are contiguous
+
+        for p in self.processors.values():
+            p.postprocess(data)
+
+        return data
+
+    def _check_data_post_transform(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        rows, cols = get_shape(data["image"])
+
+        for p in self.processors.values():
+            if not getattr(p.params, "check_each_transform", False):
+                continue
+
+            for data_name in p.data_fields:
+                data[data_name] = p.filter(data[data_name], rows, cols)
+        return data
+
+    def _to_dict(self) -> Dict[str, Any]:
+        dictionary = super(Compose, self)._to_dict()
+        bbox_processor = self.processors.get("bboxes")
+        keypoints_processor = self.processors.get("keypoints")
+        dictionary.update(
+            {
+                "bbox_params": bbox_processor.params._to_dict()
+                if bbox_processor
+                else None,  # skipcq: PYL-W0212
+                "keypoint_params": keypoints_processor.params._to_dict()  # skipcq: PYL-W0212
+                if keypoints_processor
+                else None,
+                "additional_targets": self.additional_targets,
+                "is_check_shapes": self.is_check_shapes,
+            }
+        )
+        return dictionary
+
+    def get_dict_with_id(self) -> Dict[str, Any]:
+        dictionary = super().get_dict_with_id()
+        bbox_processor = self.processors.get("bboxes")
+        keypoints_processor = self.processors.get("keypoints")
+        dictionary.update(
+            {
+                "bbox_params": bbox_processor.params._to_dict()
+                if bbox_processor
+                else None,  # skipcq: PYL-W0212
+                "keypoint_params": keypoints_processor.params._to_dict()  # skipcq: PYL-W0212
+                if keypoints_processor
+                else None,
+                "additional_targets": self.additional_targets,
+                "params": None,
+                "is_check_shapes": self.is_check_shapes,
+            }
+        )
+        return dictionary
+
+    def _check_args(self, additional_targets, **kwargs) -> None:
+        checked_single = ["image", "mask"]
+        checked_multi = ["masks"]
+        check_bbox_param = ["bboxes"]
+        # ["bboxes", "keypoints"] could be almost any type, no need to check them
+        shapes = []
+        for data_name, data in kwargs.items():
+            internal_data_name = additional_targets.get(data_name, data_name)
+            if internal_data_name in checked_single:
+                if not isinstance(data, np.ndarray):
+                    raise TypeError("{} must be numpy array type".format(data_name))
+                shapes.append(data.shape[:2])
+            if internal_data_name in checked_multi:
+                if data is not None:
+                    if not isinstance(data[0], np.ndarray):
+                        raise TypeError(
+                            "{} must be list of numpy arrays".format(data_name)
+                        )
+                    shapes.append(data[0].shape[:2])
+            if (
+                internal_data_name in check_bbox_param
+                and self.processors.get("bboxes") is None
+            ):
+                raise ValueError(
+                    "bbox_params must be specified for bbox transformations"
+                )
+
+        if self.is_check_shapes and shapes and shapes.count(shapes[0]) != len(shapes):
+            raise ValueError(
+                "Height and Width of image, mask or masks should be equal. You can disable shapes check "
+                "by setting a parameter is_check_shapes=False of Compose class (do it only if you are sure "
+                "about your data consistency)."
+            )
+
+    def _make_targets_contiguous(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        result = {}
+        for key, value in data.items():
+            if isinstance(value, np.ndarray):
+                value = np.ascontiguousarray(value)
+            result[key] = value
+        return result
+
+
+class BatchCompose(Compose):
+    """Compose designed to handle the multi-image transforms
+    The contents can be a subclass of `BatchBasedTransform` or
+    other transforms enclosed by ForEach container.
+    All targets' names should have the suffix "_batch", ex ("image_batch", "bboxes_batch").
+    Note this nameing rule is applied to the `label_fields` of the `BboxParams` and the `KeypointsParams`.
+    Args:
+        transforms (list): list of transformations to compose.
+        bbox_params (BboxParams): Parameters for bounding boxes transforms
+        keypoint_params (KeypointParams): Parameters for keypoints transforms
+        additional_targets (dict): Dict with keys - new target name, values - old target name. ex: {'image2': 'image'}
+        p (float): probability of applying all list of transforms. Default: 1.0.
+        is_check_shapes (bool): If True shapes consistency of images/mask/masks would be checked on each call. If you
+            would like to disable this check - pass False (do it only if you are sure in your data consistency).
+    """
+
+    def __init__(
+        self,
+        transforms: TransformsSeqType,
+        bbox_params: Optional[Union[dict, "BboxParams"]] = None,
+        keypoint_params: Optional[Union[dict, "KeypointParams"]] = None,
+        additional_targets: Optional[Dict[str, str]] = None,
+        p: float = 1.0,
+        is_check_shapes: bool = True,
+    ):
+        super(BatchCompose, self).__init__(
+            transforms=transforms,
+            bbox_params=bbox_params,
+            keypoint_params=keypoint_params,
+            additional_targets=additional_targets,
+            p=p,
+            is_check_shapes=is_check_shapes,
+        )
+
+    def _get_bbox_processor(self, b_params, additional_targets):
+        return BboxBatchProcessor(b_params, additional_targets)
+
+    def _get_keypoints_processor(self, k_params, additional_targets):
+        return KeypointsBatchProcessor(k_params, additional_targets)
+
+    def _check_data_post_transform(
+        self, batched_data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        datalist = batch2list(batched_data)
+        processed = []
+        for data in datalist:
+            rows, cols = get_shape(data["image"])
+            for p in self.processors.values():
+                if not getattr(p.params, "check_each_transform", False):
+                    continue
+                p = cast(Union[BboxBatchProcessor, KeypointsBatchProcessor], p)
+                for data_name in p.item_processor.data_fields:
+                    data[data_name] = p.filter(data[data_name], rows, cols)
+            processed.append(data)
+        return list2batch(processed)
+
+    def _check_args(self, additional_targets, **kwargs) -> None:
+        datalist = batch2list(kwargs)
+        unbatched_targets = {
+            to_unbatched_name(k): to_unbatched_name(v)
+            for k, v in additional_targets.items()
+        }
+        for data in datalist:
+            super(BatchCompose, self)._check_args(unbatched_targets, **data)
+
+    def _make_targets_contiguous(self, batched_data: Dict[str, Any]) -> Dict[str, Any]:
+        datalist = batch2list(batched_data)
+        if len(datalist) == 0:
+            return batched_data
+        processed = []
+        for data in datalist:
+            data = super(BatchCompose, self)._make_targets_contiguous(data)
+            processed.append(data)
+        return list2batch(processed)
+
+
+class ForEach(BaseCompose):
+    """Apply transforms for each batch element
+    This expects batched input and can be contained by the `BatchCompose`.
+    This can contains any other transforms but a subclass of the `BatchBasedTransform`.
+    Internally, this container works as the following way:
+    ```
+        for i in range(batch_size):
+            for t in transforms:
+                processed = t({"image": data["image_batch"][i], "bboxes": data["bboxes_batch"][i], ...})
+                data["image_batch"][i] = processed["image"]
+                ...
+        return data
+    ```
+    """
+
+    def __init__(self, transforms: TransformsSeqType, p: float = 0.5):
+        super().__init__(transforms, p)
+
+    def __call__(
+        self, *args, force_apply: bool = False, **batched_data
+    ) -> Dict[str, List]:
+        datalist = batch2list(batched_data)
+        processed = []
+        for data in datalist:
+            for t in self.transforms:
+                data = t(force_apply=force_apply, **data)
+            processed.append(data)
+        batched_data = list2batch(processed)
+        return batched_data
+
+    def add_targets(self, additional_targets: Optional[Dict[str, str]]) -> None:
+        if additional_targets:
+            unbatched_targets = {
+                to_unbatched_name(k): to_unbatched_name(v)
+                for k, v in additional_targets.items()
+            }
+            for t in self.transforms:
+                t.add_targets(unbatched_targets)
+
+
+class Repeat(BaseCompose):
+    """Apply transforms repeatedly and concatenates the output batches.
+    This expects batched input and can be contained by the `BatchCompose`.
+    The contained transforms should be a subbclass of the `BatchBasedTransform`.
+    Internally, this container works as the following way:
+    ```
+        out_data = {"image_batch": [], ...}
+        for _ in range(n):
+            data = in_data
+            for t in transforms:
+                data = t(**data)
+            out_data["image_batch"] += data["image_batch"]
+            ...
+        return out_data
+    ```
+    Note: This class assumes that each transform does not modify the input data.
+    """
+
+    def __init__(self, transforms: TransformsSeqType, n: int, p: float = 0.5):
+        super().__init__(transforms, p)
+        if n <= 0:
+            raise ValueError("Repetition `n` should be larger than 0")
+        self.n = n
+
+    def __call__(
+        self, *args, force_apply: bool = False, **batched_data
+    ) -> Dict[str, List]:
+        processed = []
+        for _ in range(self.n):
+            image = batched_data["image_batch"][0].copy()
+            data = batched_data
+            for t in self.transforms:
+                data = t(force_apply=force_apply, **data)
+            processed.append(data)
+            assert np.all(batched_data["image_batch"][0] == image)
+        return concat_batches(processed)
diff --git a/luxonis_ml/data/utils/aug_utils/batch_processors.py b/luxonis_ml/data/utils/aug_utils/batch_processors.py
new file mode 100644
index 00000000..359eef93
--- /dev/null
+++ b/luxonis_ml/data/utils/aug_utils/batch_processors.py
@@ -0,0 +1,136 @@
+import copy
+from typing import Optional, Dict, Any, Sequence
+from albumentations.core.utils import DataProcessor
+from albumentations.core.bbox_utils import BboxParams, BboxProcessor
+from albumentations.core.keypoints_utils import KeypointParams, KeypointsProcessor
+from .batch_utils import *
+
+
+class BboxBatchProcessor(DataProcessor):
+    def __init__(
+        self, params: BboxParams, additional_targets: Optional[Dict[str, str]] = None
+    ):
+        super().__init__(params, additional_targets)
+        item_params = copy.deepcopy(params)
+        if item_params.label_fields is not None:
+            label_fields = item_params.label_fields
+            item_params.label_fields = [
+                to_unbatched_name(field) for field in label_fields
+            ]
+        self.item_processor = BboxProcessor(item_params, additional_targets)
+
+    @property
+    def default_data_name(self) -> str:
+        return "bboxes_batch"
+
+    def ensure_data_valid(self, data: Dict[str, Any]) -> None:
+        for item in batch2list(data):
+            self.item_processor.ensure_data_valid(item)
+
+    def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        processed = [self.item_processor.postprocess(item) for item in batch2list(data)]
+        procesed_data = list2batch(processed)
+        for k in data.keys():
+            data[k] = procesed_data[k]
+        return data
+
+    def preprocess(self, data: Dict[str, Any]) -> None:
+        processed = batch2list(data)
+        for item in processed:
+            self.item_processor.preprocess(item)
+        procesed_data = list2batch(processed)
+        for k in data.keys():
+            data[k] = procesed_data[k]
+
+    def filter_batch(self, batched_data: Dict[str, Any]) -> Dict[str, Any]:
+        processed = []
+        for data in batch2list(batched_data):
+            rows, cols = data["image"][:2]
+            for data_name in self.item_processor.data_fields:
+                data[data_name] = self.item_processor.filter(
+                    data[data_name], rows, cols
+                )
+            processed.append(data)
+        return list2batch(processed)
+
+    def filter(self, data: Sequence, rows: int, cols: int) -> Sequence:
+        return self.item_processor.filter(data, rows, cols)
+
+    def check(self, data: Sequence, rows: int, cols: int) -> None:
+        return self.item_processor.check(data, rows, cols)
+
+    def convert_to_albumentations(
+        self, data: Sequence, rows: int, cols: int
+    ) -> Sequence:
+        return self.item_processor.convert_to_albumentations(data, rows, cols)
+
+    def convert_from_albumentations(
+        self, data: Sequence, rows: int, cols: int
+    ) -> Sequence:
+        return self.item_processor.convert_from_albumentations(data, rows, cols)
+
+
+class KeypointsBatchProcessor(DataProcessor):
+    def __init__(
+        self,
+        params: KeypointParams,
+        additional_targets: Optional[Dict[str, str]] = None,
+    ):
+        super().__init__(params, additional_targets)
+        item_params = copy.deepcopy(params)
+        if item_params.label_fields is not None:
+            label_fields = item_params.label_fields
+            item_params.label_fields = [
+                to_unbatched_name(field) for field in label_fields
+            ]
+        self.item_processor = KeypointsProcessor(item_params, additional_targets)
+
+    @property
+    def default_data_name(self) -> str:
+        return "keypoints_batch"
+
+    def ensure_data_valid(self, data: Dict[str, Any]) -> None:
+        for item in batch2list(data):
+            self.item_processor.ensure_data_valid(item)
+
+    def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        processed = [self.item_processor.postprocess(item) for item in batch2list(data)]
+        procesed_data = list2batch(processed)
+        for k in data.keys():
+            data[k] = procesed_data[k]
+        return data
+
+    def preprocess(self, data: Dict[str, Any]) -> None:
+        processed = batch2list(data)
+        for item in processed:
+            self.item_processor.preprocess(item)
+        procesed_data = list2batch(processed)
+        for k in data.keys():
+            data[k] = procesed_data[k]
+
+    def filter_batch(self, batched_data: Dict[str, Any]) -> Dict[str, Any]:
+        processed = []
+        for data in batch2list(batched_data):
+            rows, cols = data["image"][:2]
+            for data_name in self.item_processor.data_fields:
+                data[data_name] = self.item_processor.filter(
+                    data[data_name], rows, cols
+                )
+            processed.append(data)
+        return list2batch(processed)
+
+    def filter(self, data: Sequence, rows: int, cols: int) -> Sequence:
+        return self.item_processor.filter(data, rows, cols)
+
+    def check(self, data: Sequence, rows: int, cols: int) -> None:
+        return self.item_processor.check(data, rows, cols)
+
+    def convert_to_albumentations(
+        self, data: Sequence, rows: int, cols: int
+    ) -> Sequence:
+        return self.item_processor.convert_to_albumentations(data, rows, cols)
+
+    def convert_from_albumentations(
+        self, data: Sequence, rows: int, cols: int
+    ) -> Sequence:
+        return self.item_processor.convert_from_albumentations(data, rows, cols)
diff --git a/luxonis_ml/data/utils/aug_utils/batch_transform.py b/luxonis_ml/data/utils/aug_utils/batch_transform.py
new file mode 100644
index 00000000..5104d431
--- /dev/null
+++ b/luxonis_ml/data/utils/aug_utils/batch_transform.py
@@ -0,0 +1,56 @@
+import numpy as np
+from typing import Dict, Any, Sequence, Callable, List
+from albumentations.core.transforms_interface import (
+    BoxType,
+    KeypointType,
+    BasicTransform,
+)
+
+
+class BatchBasedTransform(BasicTransform):
+    """Transform for multi-image."""
+
+    @property
+    def targets(self) -> Dict[str, Callable]:
+        return {
+            "image_batch": self.apply_to_image_batch,
+            "mask_batch": self.apply_to_mask_batch,
+            "bboxes_batch": self.apply_to_bboxes_batch,
+            "keypoints_batch": self.apply_to_keypoints_batch,
+        }
+
+    def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        # This overwrites the `supre().update_params(...)`
+        return params
+
+    def apply_to_image_batch(
+        self, image_batch: Sequence[BoxType], **params
+    ) -> List[np.ndarray]:
+        raise NotImplementedError(
+            "Method apply_to_image_batch is not implemented in class "
+            + self.__class__.__name__
+        )
+
+    def apply_to_mask_batch(
+        self, mask_batch: Sequence[BoxType], **params
+    ) -> List[np.ndarray]:
+        raise NotImplementedError(
+            "Method apply_to_mask_batch is not implemented in class "
+            + self.__class__.__name__
+        )
+
+    def apply_to_bboxes_batch(
+        self, bboxes_batch: Sequence[BoxType], **params
+    ) -> List[BoxType]:
+        raise NotImplementedError(
+            "Method apply_to_bboxes_batch is not implemented in class "
+            + self.__class__.__name__
+        )
+
+    def apply_to_keypoints_batch(
+        self, keypoints_batch: Sequence[BoxType], **params
+    ) -> List[KeypointType]:
+        raise NotImplementedError(
+            "Method apply_to_keypoints_batch is not implemented in class "
+            + self.__class__.__name__
+        )
diff --git a/luxonis_ml/data/utils/aug_utils/batch_utils.py b/luxonis_ml/data/utils/aug_utils/batch_utils.py
new file mode 100644
index 00000000..a1e738c7
--- /dev/null
+++ b/luxonis_ml/data/utils/aug_utils/batch_utils.py
@@ -0,0 +1,105 @@
+from typing import Dict, List, Any
+
+
+def batch2list(data: Dict[str, List]) -> List[Dict[str, Any]]:
+    """Convert from a batched target dict to list of normal target dicts
+    ex:
+    {"image_batch": image_batch, "bboxes_batch": bboxes_batch, ...}
+    =>
+    [
+        {"image": image_batch[0], "bboxes": bboxes_batch[0], ...},
+        {"image": image_batch[1], "bboxes": bboxes_batch[1], ...},
+        ...
+    ]
+    """
+    if "image_batch" not in data:
+        raise ValueError("Batch-based transform should have `image_batch` target")
+    batch_size = len(data["image_batch"])
+    items = []
+    for i in range(batch_size):
+        item = {}
+        for k, v in data.items():
+            if k.endswith("_batch"):
+                # ex. {"image_batch": image_batch} -> {"image": image_batch[i]}
+                item_k = to_unbatched_name(k)
+                item[item_k] = v[i]
+            else:
+                raise ValueError(f"All key must have '_batch' suffix, got `{k}`")
+        items.append(item)
+    return items
+
+
+def list2batch(data: List[Dict[str, Any]]) -> Dict[str, List]:
+    """Convert from a list of normal target dicts to a batched target dict
+    ex:
+    [
+        {"image": image_batch[0], "bboxes": bboxes_batch[0], ...},
+        {"image": image_batch[1], "bboxes": bboxes_batch[1], ...},
+        ...
+    ]
+    =>
+    {"image_batch": image_batch, "bboxes_batch": bboxes_batch, ...}
+    """
+
+    if len(data) == 0:
+        raise ValueError("The input should have at least one item.")
+
+    item = data[0]
+    batch: Dict[str, Any] = {f"{k}_batch": [] for k in item.keys()}
+    for item in data:
+        for k, v in item.items():
+            batch_k = to_batched_name(k)
+            batch[batch_k].append(v)
+
+    return batch
+
+
+def to_unbatched_name(batched_name: str) -> str:
+    """Get a normal target name from a batched target name
+    If the given name does not have "_batched" suffix, ValueError will be raised.
+    ex. `abc --> abc_batched`
+    """
+    if not batched_name.endswith("_batch"):
+        raise ValueError(
+            f"Batched target name must have '_batch' suffix, got `{batched_name}`"
+        )
+    return batched_name.replace("_batch", "")
+
+
+def to_batched_name(name: str) -> str:
+    """Get a unbatched target name from a normal target name
+    If the given name already has had "_batched" suffix, ValueError will be raised.
+    ex. `abc_batched --> abc `
+    """
+
+    if name.endswith("_batch"):
+        raise ValueError(
+            f"Non batched target name must not have '_batch' suffix, got `{name}`"
+        )
+    return f"{name}_batch"
+
+
+def concat_batches(batches: List[Dict[str, List]]) -> Dict[str, List]:
+    """Concatenate batched targets
+     ex:
+      [
+        {"image_batch": image_batch1, "bboxes_batch": bboxes_batch1, ...}
+        {"image_batch": image_batch1, "bboxes_batch": bboxes_batch1, ...}
+      ]
+    =>
+      {
+        "image_batch": image_batch1 + image_batch2, "bboxes_batch": bboxes_batch1 + bboxes_batch2, ...
+      }
+    """
+
+    n_batches = len(batches)
+    if n_batches == 0:
+        raise ValueError("The input should have at least one item.")
+
+    keys = list(batches[0].keys())
+    out_batch: Dict[str, List] = {k: [] for k in keys}
+    for batch in batches:
+        for k in keys:
+            for item in batch[k]:
+                out_batch[k].append(item)
+    return out_batch
diff --git a/luxonis_ml/data/utils/aug_utils/mosaic4.py b/luxonis_ml/data/utils/aug_utils/mosaic4.py
new file mode 100644
index 00000000..395b69a9
--- /dev/null
+++ b/luxonis_ml/data/utils/aug_utils/mosaic4.py
@@ -0,0 +1,340 @@
+import numpy as np
+from typing import Tuple, Dict, Any, List, Optional
+from albumentations.core.transforms_interface import (
+    BoxInternalType,
+    ImageColorType,
+    KeypointInternalType,
+)
+from albumentations.core.bbox_utils import (
+    denormalize_bbox,
+    normalize_bbox,
+)
+from .batch_transform import BatchBasedTransform
+
+
+class Mosaic4(BatchBasedTransform):
+    """Mosaic augmentation arranges randomly selected four images into single image in a 2x2 grid layout.
+    The input images should have the same number of channels but can have different widths and heights.
+    The output is cropped around the intersection point of the four images with the size (out_with x out_height).
+    If the mosaic image is smaller than with x height, the gap is filled by the fill_value.
+    Args:
+        out_height (int)): output image height.
+            The mosaic image is cropped by this height around the mosaic center.
+            If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
+        out_width (int): output image width.
+            The mosaic image is cropped by this height around the mosaic center.
+            If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
+        value (int, float, list of ints, list of float): padding value. Default 0 (None).
+        replace (bool): whether to allow replacement in sampling or not. When the value is `True`, the same image
+            can be selected multiple times. When False, the batch size of the input should be at least four.
+        out_batch_size(int): output batch size. If the replace = False,
+            the input batch size should be 4 * out_batch_size.
+        mask_value (int, float, list of ints, list of float): padding value for masks. Default 0 (None).
+    Targets:
+        image_batch, mask_batch, bboxes_batch
+    [Bochkovskiy] Bochkovskiy A, Wang CY, Liao HYM. (2020) "YOLOv 4 : Optimal speed and accuracy of object detection.",
+    https://arxiv.org/pdf/2004.10934.pdf
+    """
+
+    def __init__(
+        self,
+        out_height,
+        out_width,
+        value=None,
+        replace=True,
+        out_batch_size=1,
+        mask_value=None,
+        always_apply=False,
+        p=0.5,
+    ):
+        super().__init__(always_apply=always_apply, p=p)
+
+        if out_height <= 0:
+            raise ValueError(f"out_height should be larger than 0, got {out_height}")
+        if out_width <= 0:
+            raise ValueError(f"out_width should be larger than 0, got {out_width}")
+        if out_batch_size <= 0:
+            raise ValueError(
+                f"out_batch_size should be larger than 0, got {out_batch_size}"
+            )
+
+        self.n_tiles = 4  # 2x2
+        self.out_height = out_height
+        self.out_width = out_width
+        self.replace = replace
+        self.value = value
+        self.mask_value = mask_value
+        self.out_batch_size = out_batch_size
+
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        return (
+            "out_height",
+            "out_width",
+            "replace",
+            "value",
+            "out_batch_size",
+            "mask_value",
+        )
+
+    @property
+    def targets_as_params(self):
+        return ["image_batch"]
+
+    def apply_to_image_batch(self, image_batch, indices, **params):
+        output_batch = []
+        for i_batch in range(self.out_batch_size):
+            idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
+            image_chunk = [image_batch[i] for i in idx_chunk]
+            mosaiced = mosaic4(image_chunk, self.out_height, self.out_width, self.value)
+            output_batch.append(mosaiced)
+        return output_batch
+
+    def apply_to_mask_batch(self, mask_batch, indices, **params):
+        output_batch = []
+        for i_batch in range(self.out_batch_size):
+            idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
+            mask_chunk = [mask_batch[i] for i in idx_chunk]
+            mosaiced = mosaic4(
+                mask_chunk, self.out_height, self.out_width, self.mask_value
+            )
+            output_batch.append(mosaiced)
+        return output_batch
+
+    def apply_to_bboxes_batch(self, bboxes_batch, indices, image_shapes, **params):
+        output_batch = []
+        for i_batch in range(self.out_batch_size):
+            idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
+            bboxes_chunk = [bboxes_batch[i] for i in idx_chunk]
+            shape_chunk = [image_shapes[i] for i in idx_chunk]
+            new_bboxes = []
+            for i in range(self.n_tiles):
+                bboxes = bboxes_chunk[i]
+                rows, cols = shape_chunk[i]
+                for bbox in bboxes:
+                    new_bbox = bbox_mosaic4(
+                        bbox[:4], rows, cols, i, self.out_height, self.out_width
+                    )
+                    new_bboxes.append(tuple(new_bbox) + tuple(bbox[4:]))
+            output_batch.append(new_bboxes)
+        return output_batch
+
+    def apply_to_keypoints_batch(
+        self, keyboints_batch, indices, image_shapes, **params
+    ):
+        output_batch = []
+        for i_batch in range(self.out_batch_size):
+            idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
+            keypoints_chunk = [keyboints_batch[i] for i in idx_chunk]
+            shape_chunk = [image_shapes[i] for i in idx_chunk]
+            new_keypoints = []
+            for i in range(self.n_tiles):
+                keypoints = keypoints_chunk[i]
+                rows, cols = shape_chunk[i]
+                for keypoint in keypoints:
+                    new_keypoint = keypoint_mosaic4(
+                        keypoint[:4], rows, cols, i, self.out_height, self.out_width
+                    )
+                    new_keypoints.append(new_keypoint + tuple(keypoint[4:]))
+            output_batch.append(new_keypoints)
+        return output_batch
+
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        image_batch = params["image_batch"]
+        n = len(image_batch)
+        if not self.replace and self.n_tiles * self.out_batch_size > n:
+            raise ValueError(
+                f"If replace == False, the batch size (= {n}) should be larger than "
+                + f"{self.n_tiles} x out_batch_size (= {self.n_tiles * self.out_batch_size})"
+            )
+        indices = np.random.choice(
+            range(n), size=self.n_tiles * self.out_batch_size, replace=self.replace
+        ).tolist()
+        image_shapes = [tuple(image.shape[:2]) for image in image_batch]
+        return {
+            "indices": indices,
+            "image_shapes": image_shapes,
+        }
+
+
+def mosaic4(
+    image_batch: List[np.ndarray],
+    height: int,
+    width: int,
+    value: Optional[ImageColorType] = None,
+) -> np.ndarray:
+    """Arrange the images in a 2x2 grid layout.
+    The input images should have the same number of channels but can have different widths and heights.
+    The output is cropped around the intersection point of the four images with the size (with x height).
+    If the mosaic image is smaller than with x height, the gap is filled by the fill_value.
+    This implementation is based on YOLOv5 with some modification:
+    https://github.com/ultralytics/yolov5/blob/932dc78496ca532a41780335468589ad7f0147f7/utils/datasets.py#L648
+    Args:
+        image_batch (List[np.ndarray]): image list. The length should be four. Each image can has different size.
+        height (int): Height of output mosaic image
+        width (int): Width of output mosaic image
+        value (int, float, list of ints, list of float): padding value
+    """
+    N_TILES = 4
+    if len(image_batch) != N_TILES:
+        raise ValueError(f"Length of image_batch should be 4. Got {len(image_batch)}")
+
+    for i in range(N_TILES - 1):
+        if image_batch[0].shape[2:] != image_batch[i + 1].shape[2:]:
+            raise ValueError(
+                "All images should have the same number of channels."
+                + f" Got the shapes {image_batch[0].shape} and {image_batch[i + 1].shape}"
+            )
+
+        if image_batch[0].dtype != image_batch[i + 1].dtype:
+            raise ValueError(
+                "All images should have the same dtype."
+                + f" Got the dtypes {image_batch[0].dtype} and {image_batch[i + 1].dtype}"
+            )
+
+    if len(image_batch[0].shape) == 2:
+        out_shape = [height, width]
+    else:
+        out_shape = [height, width, image_batch[0].shape[2]]
+
+    dtype = image_batch[0].dtype
+    img4 = np.zeros(out_shape, dtype=dtype)  # base image with 4 tiles
+
+    value = 0 if value is None else value
+    if isinstance(value, (tuple, list, np.ndarray)):
+        if out_shape[2] != len(value):
+            ValueError(
+                "value parameter should has the same lengh as the output channel."
+                + f" value: ({value}), output shape: {out_shape}"
+            )
+        for i in range(len(value)):
+            img4[:, :, i] = value[i]
+    else:
+        img4[:] = value
+
+    center_x = width // 2
+    center_y = height // 2
+    for i, img in enumerate(image_batch):
+        (h, w) = img.shape[:2]
+
+        # place img in img4
+        # this based on the yolo5's implementation
+        #
+        if i == 0:  # top left
+            x1a, y1a, x2a, y2a = (
+                max(center_x - w, 0),
+                max(center_y - h, 0),
+                center_x,
+                center_y,
+            )  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = (
+                w - (x2a - x1a),
+                h - (y2a - y1a),
+                w,
+                h,
+            )  # xmin, ymin, xmax, ymax (small image)
+        elif i == 1:  # top right
+            x1a, y1a, x2a, y2a = (
+                center_x,
+                max(center_y - h, 0),
+                min(center_x + w, width),
+                center_y,
+            )
+            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+        elif i == 2:  # bottom left
+            x1a, y1a, x2a, y2a = (
+                max(center_x - w, 0),
+                center_y,
+                center_x,
+                min(height, center_y + h),
+            )
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+        elif i == 3:  # bottom right
+            x1a, y1a, x2a, y2a = (
+                center_x,
+                center_y,
+                min(center_x + w, width),
+                min(height, center_y + h),
+            )
+            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+
+    return img4
+
+
+def bbox_mosaic4(
+    bbox: BoxInternalType,
+    rows: int,
+    cols: int,
+    position_index: int,
+    height: int,
+    width: int,
+) -> BoxInternalType:
+    """Put the given bbox in one of the cells of the 2x2 grid.
+    Args:
+        bbox (tuple): A bounding box `(x_min, y_min, x_max, y_max)`.
+        rows (int): Height of input image that corresponds to one of the mosaic cells
+        cols (int): Width of input image that corresponds to one of the mosaic cells
+        position_index (int): Index of the mosaic cell. 0: top left, 1: top right, 2: bottom left, 3: bottom right
+        height (int): Height of output mosaic image
+        width (int): Width of output mosaic image
+    """
+    bbox = denormalize_bbox(bbox, rows, cols)
+    center_x = width // 2
+    center_y = height // 2
+    if position_index == 0:  # top left
+        shift_x = center_x - cols
+        shift_y = center_y - rows
+    elif position_index == 1:  # top right
+        shift_x = center_x
+        shift_y = center_y - rows
+    elif position_index == 2:  # bottom left
+        shift_x = center_x - cols
+        shift_y = center_y
+    elif position_index == 3:  # bottom right
+        shift_x = center_x
+        shift_y = center_y
+    bbox = (
+        bbox[0] + shift_x,
+        bbox[1] + shift_y,
+        bbox[2] + shift_x,
+        bbox[3] + shift_y,
+    )
+    bbox = normalize_bbox(bbox, height, width)
+    return bbox
+
+
+def keypoint_mosaic4(
+    keypoint: KeypointInternalType,
+    rows: int,
+    cols: int,
+    position_index: int,
+    height: int,
+    width: int,
+) -> KeypointInternalType:
+    """Put the given bbox in one of the cells of the 2x2 grid.
+    Args:
+        keypoint (tuple): A keypoint `(x, y, angle, scale)`.
+        rows (int): Height of input image that corresponds to one of the mosaic cells
+        cols (int): Width of input image that corresponds to one of the mosaic cells
+        position_index (int): Index of the mosaic cell. 0: top left, 1: top right, 2: bottom left, 3: bottom right
+        height (int): Height of output mosaic image
+        width (int): Width of output mosaic image
+    """
+    x, y, angle, scale = keypoint
+
+    center_x = width // 2
+    center_y = height // 2
+    if position_index == 0:  # top left
+        shift_x = center_x - cols
+        shift_y = center_y - rows
+    elif position_index == 1:  # top right
+        shift_x = center_x
+        shift_y = center_y - rows
+    elif position_index == 2:  # bottom left
+        shift_x = center_x - cols
+        shift_y = center_y
+    elif position_index == 3:  # bottom right
+        shift_x = center_x
+        shift_y = center_y
+    return x + shift_x, y + shift_y, angle, scale

From 4f57493c64828babb12cf14d43bb145e161fa20e Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Sun, 7 Jan 2024 20:19:21 +0100
Subject: [PATCH 02/10] remove dependency to custom albumentations fork

---
 luxonis_ml/data/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/luxonis_ml/data/requirements.txt b/luxonis_ml/data/requirements.txt
index 2a6b94b4..5784644e 100644
--- a/luxonis_ml/data/requirements.txt
+++ b/luxonis_ml/data/requirements.txt
@@ -6,7 +6,7 @@ PyYAML>=6.0
 label-studio-sdk>=0.0.28
 # boto3>=1.17.39
 # google-cloud-storage>=2.10.0
-albumentations @ git+https://github.com/i-aki-y/albumentations.git@c137e2b # albumentations fork with batched transform support
+albumentations==1.3.1
 pandas>=2.0.0
 pyarrow>=13.0.0
 pycocotools>=2.0.7

From aae0fc36cda368701263acf922dbb0594ecd058d Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Sun, 7 Jan 2024 20:36:28 +0100
Subject: [PATCH 03/10] fix for letterbox resize box/kpts clipping

---
 luxonis_ml/data/augmentations.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/luxonis_ml/data/augmentations.py b/luxonis_ml/data/augmentations.py
index d53164e6..59a597aa 100644
--- a/luxonis_ml/data/augmentations.py
+++ b/luxonis_ml/data/augmentations.py
@@ -745,7 +745,7 @@ def apply_to_bbox(
         # clip bbox to image, ignoring padding
         bbox = bbox.clip(
             min=[pad_left, pad_top] * 2,
-            max=[params["cols"] + pad_left, params["rows"] + pad_top] * 2,
+            max=[self.width - pad_left, self.height - pad_top] * 2,
         ).tolist()
         return normalize_bbox(bbox, self.height, self.width)
 
@@ -784,10 +784,10 @@ def apply_to_keypoint(
         # if keypoint is in the padding then set coordinates to -1
         out_keypoint = (
             new_x
-            if not self._out_of_bounds(new_x, pad_left, params["cols"] + pad_left)
+            if not self._out_of_bounds(new_x, pad_left, self.width - pad_left)
             else -1,
             new_y
-            if not self._out_of_bounds(new_y, pad_top, params["rows"] + pad_top)
+            if not self._out_of_bounds(new_y, pad_top, self.height - pad_top)
             else -1,
             angle,
             scale * max(scale_x, scale_y),

From bcd1ffb90b87f318d97e49b5c3c5e7604b76bdbc Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Mon, 8 Jan 2024 14:15:19 +0100
Subject: [PATCH 04/10] moved files, epytext docstrings

---
 luxonis_ml/data/augmentations/__init__.py     |  13 +
 .../batch_compose.py                          |  91 +--
 .../batch_processors.py                       |   8 +-
 .../batch_transform.py                        |  19 +-
 .../batch_utils.py                            |  12 +-
 .../data/augmentations/letterbox_resize.py    | 289 +++++++++
 luxonis_ml/data/augmentations/mixup.py        | 170 ++++++
 .../mosaic4.py => augmentations/mosaic.py}    | 250 +++++---
 .../utils.py}                                 | 550 +-----------------
 luxonis_ml/data/utils/aug_utils/__init__.py   |   5 -
 10 files changed, 753 insertions(+), 654 deletions(-)
 create mode 100644 luxonis_ml/data/augmentations/__init__.py
 rename luxonis_ml/data/{utils/aug_utils => augmentations}/batch_compose.py (82%)
 rename luxonis_ml/data/{utils/aug_utils => augmentations}/batch_processors.py (97%)
 rename luxonis_ml/data/{utils/aug_utils => augmentations}/batch_transform.py (80%)
 rename luxonis_ml/data/{utils/aug_utils => augmentations}/batch_utils.py (90%)
 create mode 100644 luxonis_ml/data/augmentations/letterbox_resize.py
 create mode 100644 luxonis_ml/data/augmentations/mixup.py
 rename luxonis_ml/data/{utils/aug_utils/mosaic4.py => augmentations/mosaic.py} (56%)
 rename luxonis_ml/data/{augmentations.py => augmentations/utils.py} (50%)
 delete mode 100644 luxonis_ml/data/utils/aug_utils/__init__.py

diff --git a/luxonis_ml/data/augmentations/__init__.py b/luxonis_ml/data/augmentations/__init__.py
new file mode 100644
index 00000000..18408eab
--- /dev/null
+++ b/luxonis_ml/data/augmentations/__init__.py
@@ -0,0 +1,13 @@
+from .letterbox_resize import LetterboxResize
+from .mixup import MixUp
+from .mosaic import Mosaic4
+from .utils import Augmentations, TrainAugmentations, ValAugmentations
+
+__all__ = [
+    "Augmentations",
+    "TrainAugmentations",
+    "ValAugmentations",
+    "LetterboxResize",
+    "MixUp",
+    "Mosaic4",
+]
diff --git a/luxonis_ml/data/utils/aug_utils/batch_compose.py b/luxonis_ml/data/augmentations/batch_compose.py
similarity index 82%
rename from luxonis_ml/data/utils/aug_utils/batch_compose.py
rename to luxonis_ml/data/augmentations/batch_compose.py
index 454934ef..78473ae0 100644
--- a/luxonis_ml/data/utils/aug_utils/batch_compose.py
+++ b/luxonis_ml/data/augmentations/batch_compose.py
@@ -1,40 +1,49 @@
 import random
+from typing import Any, Dict, List, Optional, Union, cast
+
 import numpy as np
-from typing import Optional, Union, Dict, Any, List, cast
+from albumentations.core.bbox_utils import (
+    BboxParams,
+    BboxProcessor,
+    DataProcessor,
+)
 from albumentations.core.composition import (
     BaseCompose,
     TransformsSeqType,
     get_always_apply,
 )
-from albumentations.core.bbox_utils import (
-    DataProcessor,
-    BboxProcessor,
-    BboxParams,
-)
 from albumentations.core.keypoints_utils import KeypointParams, KeypointsProcessor
 from albumentations.core.utils import get_shape
+
 from .batch_processors import BboxBatchProcessor, KeypointsBatchProcessor
-from .batch_utils import *
+from .batch_utils import batch2list, concat_batches, list2batch, to_unbatched_name
 
 
 class Compose(BaseCompose):
-    """Compose transforms and handle all transformations regarding bounding boxes
-
-    Args:
-        transforms (list): list of transformations to compose.
-        bbox_params (BboxParams): Parameters for bounding boxes transforms
-        keypoint_params (KeypointParams): Parameters for keypoints transforms
-        additional_targets (dict): Dict with keys - new target name, values - old target name. ex: {'image2': 'image'}
-        p (float): probability of applying all list of transforms. Default: 1.0.
-        is_check_shapes (bool): If True shapes consistency of images/mask/masks would be checked on each call. If you
-            would like to disable this check - pass False (do it only if you are sure in your data consistency).
+    """Compose transforms and handle all transformations regarding bounding boxes.
+
+    @param transforms: List of transformations to compose
+    @type transforms: TransformsSeqType
+    @param bboxparams: Parameters for bounding boxes transforms. Defaults to None.
+    @type bboxparams: Optional[Union[dict, BboxParams]]
+    @param keypoint_params: Parameters for keypoint transforms. Defaults to None.
+    @type keypoint_params: Optional[Union[dict, KeypointParams]]
+    @param additional_targets: Dict with keys - new target name, values - old target
+    name. ex: {'image2': 'image'}. Defaults to None.
+    @type additional_targets: Optional[Dict[str, str]]
+    @param p: Probability of applying all list of transforms. Defaults to 1.0.
+    @type p: float
+    @param is_check_shapes: If True shapes consistency of images/mask/masks would be checked on
+    each call. If you would like to disable this check - pass False (do it only if you are sure
+    in your data consistency). Defaults to True.
+    @type is_check_shapes: bool
     """
 
     def __init__(
         self,
         transforms: TransformsSeqType,
-        bbox_params: Optional[Union[dict, "BboxParams"]] = None,
-        keypoint_params: Optional[Union[dict, "KeypointParams"]] = None,
+        bbox_params: Optional[Union[dict, BboxParams]] = None,
+        keypoint_params: Optional[Union[dict, KeypointParams]] = None,
         additional_targets: Optional[Dict[str, str]] = None,
         p: float = 1.0,
         is_check_shapes: bool = True,
@@ -232,26 +241,34 @@ def _make_targets_contiguous(self, data: Dict[str, Any]) -> Dict[str, Any]:
 
 
 class BatchCompose(Compose):
-    """Compose designed to handle the multi-image transforms
-    The contents can be a subclass of `BatchBasedTransform` or
-    other transforms enclosed by ForEach container.
-    All targets' names should have the suffix "_batch", ex ("image_batch", "bboxes_batch").
-    Note this nameing rule is applied to the `label_fields` of the `BboxParams` and the `KeypointsParams`.
-    Args:
-        transforms (list): list of transformations to compose.
-        bbox_params (BboxParams): Parameters for bounding boxes transforms
-        keypoint_params (KeypointParams): Parameters for keypoints transforms
-        additional_targets (dict): Dict with keys - new target name, values - old target name. ex: {'image2': 'image'}
-        p (float): probability of applying all list of transforms. Default: 1.0.
-        is_check_shapes (bool): If True shapes consistency of images/mask/masks would be checked on each call. If you
-            would like to disable this check - pass False (do it only if you are sure in your data consistency).
+    """Compose designed to handle the multi-image transforms The contents can be a
+    subclass of `BatchBasedTransform` or other transforms enclosed by ForEach container.
+    All targets' names should have the suffix "_batch", ex ("image_batch",
+    "bboxes_batch"). Note this nameing rule is applied to the `label_fields` of the
+    `BboxParams` and the `KeypointsParams`.
+
+    @param transforms: List of transformations to compose
+    @type transforms: TransformsSeqType
+    @param bboxparams: Parameters for bounding boxes transforms. Defaults to None.
+    @type bboxparams: Optional[Union[dict, BboxParams]]
+    @param keypoint_params: Parameters for keypoint transforms. Defaults to None.
+    @type keypoint_params: Optional[Union[dict, KeypointParams]]
+    @param additional_targets: Dict with keys - new target name, values - old target
+    name. ex: {'image2': 'image'}. Defaults to None.
+    @type additional_targets: Optional[Dict[str, str]]
+    @param p: Probability of applying all list of transforms. Defaults to 1.0.
+    @type p: float
+    @param is_check_shapes: If True shapes consistency of images/mask/masks would be checked on
+    each call. If you would like to disable this check - pass False (do it only if you are sure
+    in your data consistency). Defaults to True.
+    @type is_check_shapes: bool
     """
 
     def __init__(
         self,
         transforms: TransformsSeqType,
-        bbox_params: Optional[Union[dict, "BboxParams"]] = None,
-        keypoint_params: Optional[Union[dict, "KeypointParams"]] = None,
+        bbox_params: Optional[Union[dict, BboxParams]] = None,
+        keypoint_params: Optional[Union[dict, KeypointParams]] = None,
         additional_targets: Optional[Dict[str, str]] = None,
         p: float = 1.0,
         is_check_shapes: bool = True,
@@ -308,8 +325,9 @@ def _make_targets_contiguous(self, batched_data: Dict[str, Any]) -> Dict[str, An
 
 
 class ForEach(BaseCompose):
-    """Apply transforms for each batch element
-    This expects batched input and can be contained by the `BatchCompose`.
+    """Apply transforms for each batch element This expects batched input and can be
+    contained by the `BatchCompose`.
+
     This can contains any other transforms but a subclass of the `BatchBasedTransform`.
     Internally, this container works as the following way:
     ```
@@ -349,6 +367,7 @@ def add_targets(self, additional_targets: Optional[Dict[str, str]]) -> None:
 
 class Repeat(BaseCompose):
     """Apply transforms repeatedly and concatenates the output batches.
+
     This expects batched input and can be contained by the `BatchCompose`.
     The contained transforms should be a subbclass of the `BatchBasedTransform`.
     Internally, this container works as the following way:
diff --git a/luxonis_ml/data/utils/aug_utils/batch_processors.py b/luxonis_ml/data/augmentations/batch_processors.py
similarity index 97%
rename from luxonis_ml/data/utils/aug_utils/batch_processors.py
rename to luxonis_ml/data/augmentations/batch_processors.py
index 359eef93..6bf33b4c 100644
--- a/luxonis_ml/data/utils/aug_utils/batch_processors.py
+++ b/luxonis_ml/data/augmentations/batch_processors.py
@@ -1,9 +1,11 @@
 import copy
-from typing import Optional, Dict, Any, Sequence
-from albumentations.core.utils import DataProcessor
+from typing import Any, Dict, Optional, Sequence
+
 from albumentations.core.bbox_utils import BboxParams, BboxProcessor
 from albumentations.core.keypoints_utils import KeypointParams, KeypointsProcessor
-from .batch_utils import *
+from albumentations.core.utils import DataProcessor
+
+from .batch_utils import batch2list, list2batch, to_unbatched_name
 
 
 class BboxBatchProcessor(DataProcessor):
diff --git a/luxonis_ml/data/utils/aug_utils/batch_transform.py b/luxonis_ml/data/augmentations/batch_transform.py
similarity index 80%
rename from luxonis_ml/data/utils/aug_utils/batch_transform.py
rename to luxonis_ml/data/augmentations/batch_transform.py
index 5104d431..70e7510c 100644
--- a/luxonis_ml/data/utils/aug_utils/batch_transform.py
+++ b/luxonis_ml/data/augmentations/batch_transform.py
@@ -1,14 +1,27 @@
+from typing import Any, Callable, Dict, List, Sequence
+
 import numpy as np
-from typing import Dict, Any, Sequence, Callable, List
 from albumentations.core.transforms_interface import (
+    BasicTransform,
     BoxType,
     KeypointType,
-    BasicTransform,
 )
 
 
 class BatchBasedTransform(BasicTransform):
-    """Transform for multi-image."""
+    """."""
+
+    def __init__(self, batch_size: int, **kwargs):
+        """Transform for multi-image.
+
+        @param batch_size: Batch size needed for augmentation to work
+        @type batch_size: int
+        @param kwargs: Additional BasicTransform parameters
+        @type kwargs: Any
+        """
+        super().__init__(**kwargs)
+
+        self.batch_size = batch_size
 
     @property
     def targets(self) -> Dict[str, Callable]:
diff --git a/luxonis_ml/data/utils/aug_utils/batch_utils.py b/luxonis_ml/data/augmentations/batch_utils.py
similarity index 90%
rename from luxonis_ml/data/utils/aug_utils/batch_utils.py
rename to luxonis_ml/data/augmentations/batch_utils.py
index a1e738c7..1206bdf8 100644
--- a/luxonis_ml/data/utils/aug_utils/batch_utils.py
+++ b/luxonis_ml/data/augmentations/batch_utils.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Any
+from typing import Any, Dict, List
 
 
 def batch2list(data: Dict[str, List]) -> List[Dict[str, Any]]:
@@ -55,8 +55,9 @@ def list2batch(data: List[Dict[str, Any]]) -> Dict[str, List]:
 
 
 def to_unbatched_name(batched_name: str) -> str:
-    """Get a normal target name from a batched target name
-    If the given name does not have "_batched" suffix, ValueError will be raised.
+    """Get a normal target name from a batched target name If the given name does not
+    have "_batched" suffix, ValueError will be raised.
+
     ex. `abc --> abc_batched`
     """
     if not batched_name.endswith("_batch"):
@@ -67,8 +68,9 @@ def to_unbatched_name(batched_name: str) -> str:
 
 
 def to_batched_name(name: str) -> str:
-    """Get a unbatched target name from a normal target name
-    If the given name already has had "_batched" suffix, ValueError will be raised.
+    """Get a unbatched target name from a normal target name If the given name already
+    has had "_batched" suffix, ValueError will be raised.
+
     ex. `abc_batched --> abc `
     """
 
diff --git a/luxonis_ml/data/augmentations/letterbox_resize.py b/luxonis_ml/data/augmentations/letterbox_resize.py
new file mode 100644
index 00000000..1dbd2b37
--- /dev/null
+++ b/luxonis_ml/data/augmentations/letterbox_resize.py
@@ -0,0 +1,289 @@
+from typing import Any, Dict, Tuple
+
+import cv2
+import numpy as np
+from albumentations import BoxType, DualTransform, KeypointType
+from albumentations.core.bbox_utils import denormalize_bbox, normalize_bbox
+
+from .utils import AUGMENTATIONS
+
+
+@AUGMENTATIONS.register_module()
+class LetterboxResize(DualTransform):
+    def __init__(
+        self,
+        height: int,
+        width: int,
+        interpolation: int = cv2.INTER_LINEAR,
+        border_value: int = 0,
+        mask_value: int = 0,
+        always_apply: bool = False,
+        p: float = 1.0,
+    ):
+        """Augmentation to apply letterbox resizing to images. Also transforms masks,
+        bboxes and keypoints to correct shape.
+
+        @param height: Desired height of the output.
+        @type height: int
+        @param width: Desired width of the output.
+        @type width: int
+        @param interpolation: Cv2 flag to specify interpolation used when resizing.
+            Defaults to cv2.INTER_LINEAR.
+        @type interpolation: int, optional
+        @param border_value: Padding value for images. Defaults to 0.
+        @type border_value: int, optional
+        @param mask_value: Padding value for masks. Defaults to 0.
+        @type mask_value: int, optional
+        @param always_apply: Whether to always apply the transform. Defaults to False.
+        @type always_apply: bool, optional
+        @param p: Probability of applying the transform. Defaults to 1.0.
+        @type p: float, optional
+        """
+
+        super().__init__(always_apply, p)
+
+        if not (0 <= border_value <= 255):
+            raise ValueError("Border value must be in range [0,255].")
+
+        if not (0 <= mask_value <= 255):
+            raise ValueError("Mask value must be in range [0,255].")
+
+        self.height = height
+        self.width = width
+        self.interpolation = interpolation
+        self.border_value = border_value
+        self.mask_value = mask_value
+
+    def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        """Updates augmentation parameters with the necessary metadata.
+
+        @param params: The existing augmentation parameters dictionary.
+        @type params: Dict[str, Any]
+        @param kwargs: Additional keyword arguments to add the parameters.
+        @type kwargs: Any
+        @return: Updated dictionary containing the merged parameters.
+        @rtype: Dict[str, Any]
+        """
+
+        params = super().update_params(params, **kwargs)
+
+        img_height = params["rows"]
+        img_width = params["cols"]
+
+        ratio = min(self.height / img_height, self.width / img_width)
+        new_height = int(img_height * ratio)
+        new_width = int(img_width * ratio)
+
+        # only supports center alignment
+        pad_top = (self.height - new_height) // 2
+        pad_bottom = pad_top
+
+        pad_left = (self.width - new_width) // 2
+        pad_right = pad_left
+
+        params.update(
+            {
+                "pad_top": pad_top,
+                "pad_bottom": pad_bottom,
+                "pad_left": pad_left,
+                "pad_right": pad_right,
+            }
+        )
+
+        return params
+
+    def apply(
+        self,
+        img: np.ndarray,
+        pad_top: int,
+        pad_bottom: int,
+        pad_left: int,
+        pad_right: int,
+        **kwargs,
+    ) -> np.ndarray:
+        """Applies the letterbox augmentation to an image.
+
+        @param img: Input image to which resize is applied.
+        @type img: np.ndarray
+        @param pad_top: Number of pixels to pad at the top.
+        @type pad_top: int
+        @param pad_bottom: Number of pixels to pad at the bottom.
+        @type pad_bottom: int
+        @param pad_left: Number of pixels to pad on the left.
+        @type pad_left: int
+        @param pad_right: Number of pixels to pad on the right.
+        @type pad_right: int
+        @param params: Additional parameters for the padding operation.
+        @type params: Any
+        @return: Image with applied letterbox resize.
+        @rtype: np.ndarray
+        """
+
+        resized_img = cv2.resize(
+            img,
+            (self.width - pad_left - pad_right, self.height - pad_top - pad_bottom),
+            interpolation=self.interpolation,
+        )
+        img_out = cv2.copyMakeBorder(
+            resized_img,
+            pad_top,
+            pad_bottom,
+            pad_left,
+            pad_right,
+            cv2.BORDER_CONSTANT,
+            self.border_value,
+        )
+        img_out = img_out.astype(img.dtype)
+        return img_out
+
+    def apply_to_mask(
+        self,
+        img: np.ndarray,
+        pad_top: int,
+        pad_bottom: int,
+        pad_left: int,
+        pad_right: int,
+        **params,
+    ) -> np.ndarray:
+        """Applies letterbox augmentation to the input mask.
+
+        @param img: Input mask to which resize is applied.
+        @type img: np.ndarray
+        @param pad_top: Number of pixels to pad at the top.
+        @type pad_top: int
+        @param pad_bottom: Number of pixels to pad at the bottom.
+        @type pad_bottom: int
+        @param pad_left: Number of pixels to pad on the left.
+        @type pad_left: int
+        @param pad_right: Number of pixels to pad on the right.
+        @type pad_right: int
+        @param params: Additional parameters for the padding operation.
+        @type params: Any
+        @return: Mask with applied letterbox resize.
+        @rtype: np.ndarray
+        """
+
+        resized_img = cv2.resize(
+            img,
+            (self.width - pad_left - pad_right, self.height - pad_top - pad_bottom),
+            interpolation=cv2.INTER_NEAREST,
+        )
+        img_out = cv2.copyMakeBorder(
+            resized_img,
+            pad_top,
+            pad_bottom,
+            pad_left,
+            pad_right,
+            cv2.BORDER_CONSTANT,
+            self.mask_value,
+        )
+        img_out = img_out.astype(img.dtype)
+        return img_out
+
+    def apply_to_bbox(
+        self,
+        bbox: BoxType,
+        pad_top: int,
+        pad_bottom: int,
+        pad_left: int,
+        pad_right: int,
+        **params,
+    ) -> BoxType:
+        """Applies letterbox augmentation to the bounding box.
+
+        @param img: Bounding box to which resize is applied.
+        @type img: BoxType
+        @param pad_top: Number of pixels to pad at the top.
+        @type pad_top: int
+        @param pad_bottom: Number of pixels to pad at the bottom.
+        @type pad_bottom: int
+        @param pad_left: Number of pixels to pad on the left.
+        @type pad_left: int
+        @param pad_right: Number of pixels to pad on the right.
+        @type pad_right: int
+        @param params: Additional parameters for the padding operation.
+        @type params: Any
+        @return: Bounding box with applied letterbox resize.
+        @rtype: BoxType
+        """
+
+        x_min, y_min, x_max, y_max = denormalize_bbox(
+            bbox, self.height - pad_top - pad_bottom, self.width - pad_left - pad_right
+        )[:4]
+        bbox = np.array(
+            [x_min + pad_left, y_min + pad_top, x_max + pad_left, y_max + pad_top]
+        )
+        # clip bbox to image, ignoring padding
+        bbox = bbox.clip(
+            min=[pad_left, pad_top] * 2,
+            max=[self.width - pad_left, self.height - pad_top] * 2,
+        ).tolist()
+        return normalize_bbox(bbox, self.height, self.width)
+
+    def apply_to_keypoint(
+        self,
+        keypoint: KeypointType,
+        pad_top: int,
+        pad_bottom: int,
+        pad_left: int,
+        pad_right: int,
+        **params,
+    ) -> KeypointType:
+        """Applies letterbox augmentation to the keypoint.
+
+        @param img: Keypoint to which resize is applied.
+        @type img: KeypointType
+        @param pad_top: Number of pixels to pad at the top.
+        @type pad_top: int
+        @param pad_bottom: Number of pixels to pad at the bottom.
+        @type pad_bottom: int
+        @param pad_left: Number of pixels to pad on the left.
+        @type pad_left: int
+        @param pad_right: Number of pixels to pad on the right.
+        @type pad_right: int
+        @param params: Additional parameters for the padding operation.
+        @type params: Any
+        @return: Keypoint with applied letterbox resize.
+        @rtype: KeypointType
+        """
+
+        x, y, angle, scale = keypoint[:4]
+        scale_x = (self.width - pad_left - pad_right) / params["cols"]
+        scale_y = (self.height - pad_top - pad_bottom) / params["rows"]
+        new_x = (x * scale_x) + pad_left
+        new_y = (y * scale_y) + pad_top
+        # if keypoint is in the padding then set coordinates to -1
+        out_keypoint = (
+            new_x
+            if not self._out_of_bounds(new_x, pad_left, self.width - pad_left)
+            else -1,
+            new_y
+            if not self._out_of_bounds(new_y, pad_top, self.height - pad_top)
+            else -1,
+            angle,
+            scale * max(scale_x, scale_y),
+        )
+        return out_keypoint
+
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        """Gets the default arguments for the letterbox augmentation.
+
+        @return: The string keywords of the arguments.
+        @rtype: Tuple[str, ...]
+        """
+
+        return ("height", "width", "interpolation", "border_value", "mask_value")
+
+    def _out_of_bounds(self, value: float, min_limit: float, max_limit: float) -> bool:
+        """ "Check if the given value is outside the specified limits.
+
+        @param value: The value to be checked.
+        @type value: float
+        @param min_limit: Minimum limit.
+        @type min_limit: float
+        @param max_limit: Maximum limit.
+        @type max_limit: float
+        @return: True if the value is outside the specified limits, False otherwise.
+        @rtype: bool
+        """
+        return value < min_limit or value > max_limit
diff --git a/luxonis_ml/data/augmentations/mixup.py b/luxonis_ml/data/augmentations/mixup.py
new file mode 100644
index 00000000..204d4fe8
--- /dev/null
+++ b/luxonis_ml/data/augmentations/mixup.py
@@ -0,0 +1,170 @@
+import random
+from typing import Any, Dict, List, Tuple, Union
+
+import albumentations as A
+import cv2
+import numpy as np
+from albumentations import BoxType, KeypointType
+
+from .batch_transform import BatchBasedTransform
+from .utils import AUGMENTATIONS
+
+
+@AUGMENTATIONS.register_module()
+class MixUp(BatchBasedTransform):
+    def __init__(
+        self,
+        alpha: Union[float, Tuple[float, float]] = 0.5,
+        out_batch_size: int = 1,
+        always_apply: bool = False,
+        p: float = 0.5,
+    ):
+        """MixUp augmentation that merges two images and their annotations into one. If
+        images are not of same size then second one is first resized to match the first
+        one.
+
+        @param alpha: Mixing coefficient, either a single float or a tuple representing
+            the range. Defaults to 0.5.
+        @type alpha: Union[float, Tuple[float, float]], optional
+        @param out_batch_size: Number of output images in the batch. Defaults to 1.
+        @type out_batch_size: int, optional
+        @param always_apply: Whether to always apply the transform. Defaults to False.
+        @type always_apply: bool, optional
+        @param p: Probability of applying the transform. Defaults to 0.5.
+        @type p: float, optional
+        """
+        super().__init__(batch_size=2, always_apply=always_apply, p=p)
+
+        self.alpha = alpha
+        self.out_batch_size = out_batch_size
+
+    def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        """Gets the default arguments for the mixup augmentation.
+
+        @return: The string keywords of the arguments.
+        @rtype: Tuple[str, ...]
+        """
+        return ("alpha", "out_batch_size")
+
+    @property
+    def targets_as_params(self) -> List[str]:
+        """List of augmentation targets.
+
+        @return: Output list of augmentation targets.
+        @rtype: List[str]
+        """
+        return ["image_batch"]
+
+    def apply_to_image_batch(
+        self,
+        image_batch: List[np.ndarray],
+        image_shapes: List[Tuple[int, int]],
+        **params,
+    ) -> List[np.ndarray]:
+        """Applies the transformation to a batch of images.
+
+        @param image_batch: Batch of input images to which the transformation is
+            applied.
+        @type image_batch: List[np.ndarray]
+        @param image_shapes: Shapes of the input images in the batch.
+        @type image_shapes: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed images.
+        @rtype: List[np.ndarray]
+        """
+        image1 = image_batch[0]
+        # resize second image to size of the first one
+        image2 = cv2.resize(image_batch[1], (image_shapes[0][1], image_shapes[0][0]))
+
+        if isinstance(self.alpha, float):
+            curr_alpha = np.clip(self.alpha, 0, 1)
+        else:
+            curr_alpha = random.uniform(max(self.alpha[0], 0), min(self.alpha[1], 1))
+        img_out = cv2.addWeighted(image1, curr_alpha, image2, 1 - curr_alpha, 0.0)
+        return [img_out]
+
+    def apply_to_mask_batch(
+        self,
+        mask_batch: List[np.ndarray],
+        image_shapes: List[Tuple[int, int]],
+        **params,
+    ) -> List[np.ndarray]:
+        """Applies the transformation to a batch of masks.
+
+        @param image_batch: Batch of input masks to which the transformation is applied.
+        @type image_batch: List[np.ndarray]
+        @param image_shapes: Shapes of the input images in the batch.
+        @type image_shapes: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed masks.
+        @rtype: List[np.ndarray]
+        """
+        mask1 = mask_batch[0]
+        mask2 = cv2.resize(
+            mask_batch[1],
+            (image_shapes[0][1], image_shapes[0][0]),
+            interpolation=cv2.INTER_NEAREST,
+        )
+        out_mask = mask1 + mask2
+        # if masks intersect keep one present in first image
+        mask_inter = mask1 > 0
+        out_mask[mask_inter] = mask1[mask_inter]
+        return [out_mask]
+
+    def apply_to_bboxes_batch(
+        self, bboxes_batch: List[BoxType], image_shapes: List[Tuple[int, int]], **params
+    ) -> List[BoxType]:
+        """Applies the transformation to a batch of bboxes.
+
+        @param image_batch: Batch of input bboxes to which the transformation is
+            applied.
+        @type image_batch: List[BoxType]
+        @param image_shapes: Shapes of the input images in the batch.
+        @type image_shapes: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed bboxes.
+        @rtype: List[BoxType]
+        """
+        return [bboxes_batch[0] + bboxes_batch[1]]
+
+    def apply_to_keypoints_batch(
+        self,
+        keypoints_batch: List[KeypointType],
+        image_shapes: List[Tuple[int, int]],
+        **params,
+    ) -> List[KeypointType]:
+        """Applies the transformation to a batch of keypoints.
+
+        @param image_batch: Batch of input keypoints to which the transformation is
+            applied.
+        @type image_batch: List[BoxType]
+        @param image_shapes: Shapes of the input images in the batch.
+        @type image_shapes: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed keypoints.
+        @rtype: List[BoxType]
+        """
+        scaled_kpts2 = []
+        scale_x = image_shapes[0][1] / image_shapes[1][1]
+        scale_y = image_shapes[0][0] / image_shapes[1][0]
+        for kpt in keypoints_batch[1]:
+            new_kpt = A.augmentations.geometric.functional.keypoint_scale(
+                keypoint=kpt, scale_x=scale_x, scale_y=scale_y
+            )
+            scaled_kpts2.append(new_kpt + kpt[4:])
+        return [keypoints_batch[0] + scaled_kpts2]
+
+    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Get parameters dependent on the targets.
+
+        @param params: Dictionary containing parameters.
+        @type params: Dict[str, Any]
+        @return: Dictionary containing parameters dependent on the targets.
+        @rtype: Dict[str, Any]
+        """
+        image_batch = params["image_batch"]
+        return {"image_shapes": [image.shape[:2] for image in image_batch]}
diff --git a/luxonis_ml/data/utils/aug_utils/mosaic4.py b/luxonis_ml/data/augmentations/mosaic.py
similarity index 56%
rename from luxonis_ml/data/utils/aug_utils/mosaic4.py
rename to luxonis_ml/data/augmentations/mosaic.py
index 395b69a9..0556b3f4 100644
--- a/luxonis_ml/data/utils/aug_utils/mosaic4.py
+++ b/luxonis_ml/data/augmentations/mosaic.py
@@ -1,53 +1,66 @@
+from typing import Any, Dict, List, Optional, Tuple, Union
+
 import numpy as np
-from typing import Tuple, Dict, Any, List, Optional
+from albumentations import BoxType, KeypointType
+from albumentations.core.bbox_utils import (
+    denormalize_bbox,
+    normalize_bbox,
+)
 from albumentations.core.transforms_interface import (
     BoxInternalType,
     ImageColorType,
     KeypointInternalType,
 )
-from albumentations.core.bbox_utils import (
-    denormalize_bbox,
-    normalize_bbox,
-)
+
 from .batch_transform import BatchBasedTransform
+from .utils import AUGMENTATIONS
 
 
+@AUGMENTATIONS.register_module()
 class Mosaic4(BatchBasedTransform):
-    """Mosaic augmentation arranges randomly selected four images into single image in a 2x2 grid layout.
-    The input images should have the same number of channels but can have different widths and heights.
-    The output is cropped around the intersection point of the four images with the size (out_with x out_height).
-    If the mosaic image is smaller than with x height, the gap is filled by the fill_value.
-    Args:
-        out_height (int)): output image height.
-            The mosaic image is cropped by this height around the mosaic center.
-            If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
-        out_width (int): output image width.
-            The mosaic image is cropped by this height around the mosaic center.
-            If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
-        value (int, float, list of ints, list of float): padding value. Default 0 (None).
-        replace (bool): whether to allow replacement in sampling or not. When the value is `True`, the same image
-            can be selected multiple times. When False, the batch size of the input should be at least four.
-        out_batch_size(int): output batch size. If the replace = False,
-            the input batch size should be 4 * out_batch_size.
-        mask_value (int, float, list of ints, list of float): padding value for masks. Default 0 (None).
-    Targets:
-        image_batch, mask_batch, bboxes_batch
-    [Bochkovskiy] Bochkovskiy A, Wang CY, Liao HYM. (2020) "YOLOv 4 : Optimal speed and accuracy of object detection.",
-    https://arxiv.org/pdf/2004.10934.pdf
-    """
-
     def __init__(
         self,
-        out_height,
-        out_width,
-        value=None,
-        replace=True,
-        out_batch_size=1,
-        mask_value=None,
-        always_apply=False,
-        p=0.5,
+        out_height: int,
+        out_width: int,
+        value: Optional[Union[int, float, List[int], List[float]]] = None,
+        out_batch_size: int = 1,
+        mask_value: Optional[Union[int, float, List[int], List[float]]] = None,
+        always_apply: bool = False,
+        p: float = 0.5,
     ):
-        super().__init__(always_apply=always_apply, p=p)
+        """Mosaic augmentation arranges selected four images into single image in a 2x2
+        grid layout. This is done in deterministic way meaning first image in the batch
+        will always be in top left. The input images should have the same number of
+        channels but can have different widths and heights. The output is cropped around
+        the intersection point of the four images with the size (out_with x out_height).
+        If the mosaic image is smaller than with x height, the gap is filled by the
+        fill_value.
+
+        @param out_height: Output image height. The mosaic image is cropped by this height around the mosaic center.
+        If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
+        @type out_height: int
+
+        @param out_width: Output image width. The mosaic image is cropped by this height around the mosaic center.
+        If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
+        @type out_width: int
+
+        @param value: Padding value. Defaults to None.
+        @type value: Optional[Union[int, float, List[int], List[float]]], optional
+
+        @param out_batch_size: Number of output images in the batch. Defaults to 1.
+        @type out_batch_size: int, optional
+
+        @param mask_value: Padding value for masks. Defaults to None.
+        @type mask_value: Optional[Union[int, float, List[int], List[float]]], optional
+
+        @param always_apply: Whether to always apply the transform. Defaults to False.
+        @type always_apply: bool, optional
+
+        @param p: Probability of applying the transform. Defaults to 0.5.
+        @type p: float, optional
+        """
+
+        super().__init__(batch_size=4, always_apply=always_apply, p=p)
 
         if out_height <= 0:
             raise ValueError(f"out_height should be larger than 0, got {out_height}")
@@ -58,15 +71,19 @@ def __init__(
                 f"out_batch_size should be larger than 0, got {out_batch_size}"
             )
 
-        self.n_tiles = 4  # 2x2
+        self.n_tiles = self.batch_size  # 4: 2x2
         self.out_height = out_height
         self.out_width = out_width
-        self.replace = replace
         self.value = value
         self.mask_value = mask_value
         self.out_batch_size = out_batch_size
 
     def get_transform_init_args_names(self) -> Tuple[str, ...]:
+        """Gets the default arguments for the mixup augmentation.
+
+        @return: The string keywords of the arguments.
+        @rtype: Tuple[str, ...]
+        """
         return (
             "out_height",
             "out_width",
@@ -78,9 +95,28 @@ def get_transform_init_args_names(self) -> Tuple[str, ...]:
 
     @property
     def targets_as_params(self):
+        """List of augmentation targets.
+
+        @return: Output list of augmentation targets.
+        @rtype: List[str]
+        """
         return ["image_batch"]
 
-    def apply_to_image_batch(self, image_batch, indices, **params):
+    def apply_to_image_batch(
+        self, image_batch: List[np.ndarray], indices: List[int], **params
+    ) -> List[np.ndarray]:
+        """Applies the transformation to a batch of images.
+
+        @param image_batch: Batch of input images to which the transformation is
+            applied.
+        @type image_batch: List[np.ndarray]
+        @param indices: Indices of images in the batch.
+        @type indices: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed images.
+        @rtype: List[np.ndarray]
+        """
         output_batch = []
         for i_batch in range(self.out_batch_size):
             idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
@@ -89,7 +125,20 @@ def apply_to_image_batch(self, image_batch, indices, **params):
             output_batch.append(mosaiced)
         return output_batch
 
-    def apply_to_mask_batch(self, mask_batch, indices, **params):
+    def apply_to_mask_batch(
+        self, mask_batch: List[np.ndarray], indices: List[int], **params
+    ) -> List[np.ndarray]:
+        """Applies the transformation to a batch of masks.
+
+        @param mask_batch: Batch of input masks to which the transformation is applied.
+        @type mask_batch: List[np.ndarray]
+        @param indices: Indices of images in the batch.
+        @type indices: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed masks.
+        @rtype: List[np.ndarray]
+        """
         output_batch = []
         for i_batch in range(self.out_batch_size):
             idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
@@ -100,7 +149,27 @@ def apply_to_mask_batch(self, mask_batch, indices, **params):
             output_batch.append(mosaiced)
         return output_batch
 
-    def apply_to_bboxes_batch(self, bboxes_batch, indices, image_shapes, **params):
+    def apply_to_bboxes_batch(
+        self,
+        bboxes_batch: List[BoxType],
+        indices: List[int],
+        image_shapes: List[Tuple[int, int]],
+        **params,
+    ) -> List[BoxType]:
+        """Applies the transformation to a batch of bboxes.
+
+        @param bboxes_batch: Batch of input bboxes to which the transformation is
+            applied.
+        @type bboxes_batch: List[BboxType]
+        @param indices: Indices of images in the batch.
+        @type indices: List[Tuple[int, int]]
+        @param image_shapes: Shapes of the input images in the batch.
+        @type image_shapes: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed bboxes.
+        @rtype: List[BoxType]
+        """
         output_batch = []
         for i_batch in range(self.out_batch_size):
             idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
@@ -119,8 +188,26 @@ def apply_to_bboxes_batch(self, bboxes_batch, indices, image_shapes, **params):
         return output_batch
 
     def apply_to_keypoints_batch(
-        self, keyboints_batch, indices, image_shapes, **params
-    ):
+        self,
+        keyboints_batch: List[KeypointType],
+        indices: List[int],
+        image_shapes: List[Tuple[int, int]],
+        **params,
+    ) -> List[KeypointType]:
+        """Applies the transformation to a batch of keypoints.
+
+        @param keypoints_batch: Batch of input keypoints to which the transformation is
+            applied.
+        @type keypoints_batch: List[KeypointType]
+        @param indices: Indices of images in the batch.
+        @type indices: List[Tuple[int, int]]
+        @param image_shapes: Shapes of the input images in the batch.
+        @type image_shapes: List[Tuple[int, int]]
+        @param params: Additional parameters for the transformation.
+        @type params: Any
+        @return: List of transformed keypoints.
+        @rtype: List[KeypointType]
+        """
         output_batch = []
         for i_batch in range(self.out_batch_size):
             idx_chunk = indices[self.n_tiles * i_batch : self.n_tiles * (i_batch + 1)]
@@ -139,15 +226,22 @@ def apply_to_keypoints_batch(
         return output_batch
 
     def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Get parameters dependent on the targets.
+
+        @param params: Dictionary containing parameters.
+        @type params: Dict[str, Any]
+        @return: Dictionary containing parameters dependent on the targets.
+        @rtype: Dict[str, Any]
+        """
         image_batch = params["image_batch"]
         n = len(image_batch)
-        if not self.replace and self.n_tiles * self.out_batch_size > n:
+        if self.n_tiles * self.out_batch_size > n:
             raise ValueError(
-                f"If replace == False, the batch size (= {n}) should be larger than "
+                f"The batch size (= {n}) should be larger than "
                 + f"{self.n_tiles} x out_batch_size (= {self.n_tiles * self.out_batch_size})"
             )
         indices = np.random.choice(
-            range(n), size=self.n_tiles * self.out_batch_size, replace=self.replace
+            range(n), size=self.n_tiles * self.out_batch_size, replace=False
         ).tolist()
         image_shapes = [tuple(image.shape[:2]) for image in image_batch]
         return {
@@ -168,11 +262,17 @@ def mosaic4(
     If the mosaic image is smaller than with x height, the gap is filled by the fill_value.
     This implementation is based on YOLOv5 with some modification:
     https://github.com/ultralytics/yolov5/blob/932dc78496ca532a41780335468589ad7f0147f7/utils/datasets.py#L648
-    Args:
-        image_batch (List[np.ndarray]): image list. The length should be four. Each image can has different size.
-        height (int): Height of output mosaic image
-        width (int): Width of output mosaic image
-        value (int, float, list of ints, list of float): padding value
+
+    @param image_batch: Image list. The length should be four. Each image can has different size.
+    @type image_batch: List[np.ndarray]
+    @param height: Height of output mosaic image
+    @type height: int
+    @param width: Width of output mosaic image
+    @type width: int
+    @param value: Padding value
+    @type value: Optional[ImageColorType]
+    @return: Final output image
+    @rtype: np.ndarray
     """
     N_TILES = 4
     if len(image_batch) != N_TILES:
@@ -271,13 +371,22 @@ def bbox_mosaic4(
     width: int,
 ) -> BoxInternalType:
     """Put the given bbox in one of the cells of the 2x2 grid.
-    Args:
-        bbox (tuple): A bounding box `(x_min, y_min, x_max, y_max)`.
-        rows (int): Height of input image that corresponds to one of the mosaic cells
-        cols (int): Width of input image that corresponds to one of the mosaic cells
-        position_index (int): Index of the mosaic cell. 0: top left, 1: top right, 2: bottom left, 3: bottom right
-        height (int): Height of output mosaic image
-        width (int): Width of output mosaic image
+
+    @param bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
+    @type bbox: BoxInternalType
+    @param rows: Height of input image that corresponds to one of the mosaic cells
+    @type rows: int
+    @param cols: Width of input image that corresponds to one of the mosaic cells
+    @type cols: int
+    @param position_index: Index of the mosaic cell. 0: top left, 1: top right, 2:
+        bottom left, 3: bottom right
+    @type position_index: int
+    @param height: Height of output mosaic image
+    @type height: int
+    @param width: Width of output mosaic image
+    @type width: int
+    @return: Transformed bbox
+    @rtype: BoxInternalType
     """
     bbox = denormalize_bbox(bbox, rows, cols)
     center_x = width // 2
@@ -313,13 +422,22 @@ def keypoint_mosaic4(
     width: int,
 ) -> KeypointInternalType:
     """Put the given bbox in one of the cells of the 2x2 grid.
-    Args:
-        keypoint (tuple): A keypoint `(x, y, angle, scale)`.
-        rows (int): Height of input image that corresponds to one of the mosaic cells
-        cols (int): Width of input image that corresponds to one of the mosaic cells
-        position_index (int): Index of the mosaic cell. 0: top left, 1: top right, 2: bottom left, 3: bottom right
-        height (int): Height of output mosaic image
-        width (int): Width of output mosaic image
+
+    @param keypoint: A keypoint `(x, y, angle, scale)`.
+    @type bbox: KeypointInternalType
+    @param rows: Height of input image that corresponds to one of the mosaic cells
+    @type rows: int
+    @param cols: Width of input image that corresponds to one of the mosaic cells
+    @type cols: int
+    @param position_index: Index of the mosaic cell. 0: top left, 1: top right, 2:
+        bottom left, 3: bottom right
+    @type position_index: int
+    @param height: Height of output mosaic image
+    @type height: int
+    @param width: Width of output mosaic image
+    @type width: int
+    @return: Transformed keypoint
+    @rtype: KeypointInternalType
     """
     x, y, angle, scale = keypoint
 
diff --git a/luxonis_ml/data/augmentations.py b/luxonis_ml/data/augmentations/utils.py
similarity index 50%
rename from luxonis_ml/data/augmentations.py
rename to luxonis_ml/data/augmentations/utils.py
index 59a597aa..76480173 100644
--- a/luxonis_ml/data/augmentations.py
+++ b/luxonis_ml/data/augmentations/utils.py
@@ -1,23 +1,14 @@
-import random
-import warnings
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Tuple
 
 import albumentations as A
 import cv2
 import numpy as np
-from albumentations.core.bbox_utils import denormalize_bbox, normalize_bbox
-from albumentations.core.transforms_interface import (
-    BoxInternalType,
-    BoxType,
-    DualTransform,
-    KeypointInternalType,
-    KeypointType,
-)
 
-import luxonis_ml.data.utils.aug_utils as A_batched
 from luxonis_ml.enums import LabelType
 from luxonis_ml.utils.registry import Registry
 
+from .batch_compose import BatchCompose, ForEach
+from .batch_transform import BatchBasedTransform
 
 AUGMENTATIONS = Registry(name="augmentations")
 
@@ -40,7 +31,7 @@ def _parse_cfg(
         image_size: List[int],
         augmentations: List[Dict[str, Any]],
         keep_aspect_ratio: bool = True,
-    ) -> Tuple[A_batched.BatchCompose, A.Compose]:
+    ) -> Tuple[BatchCompose, A.Compose]:
         """Parses provided config and returns Albumentations BatchedCompose object and
         Compose object for default transforms.
 
@@ -51,7 +42,7 @@ def _parse_cfg(
         @type keep_aspect_ratio: bool
         @param keep_aspect_ratio: Whether should use resize that keeps aspect ratio of
             original image.
-        @rtype: Tuple[A.BatchCompose, A.Compose]
+        @rtype: Tuple[BatchCompose, A.Compose]
         @return: Objects for batched and spatial transforms
         """
 
@@ -59,7 +50,9 @@ def _parse_cfg(
 
         # NOTE: Always perform Resize
         if keep_aspect_ratio:
-            resize = LetterboxResize(height=image_size[0], width=image_size[1])
+            resize = AUGMENTATIONS.get("LetterboxResize")(
+                height=image_size[0], width=image_size[1]
+            )
         else:
             resize = A.Resize(image_size[0], image_size[1])
 
@@ -73,16 +66,16 @@ def _parse_cfg(
                     pixel_augs.append(curr_aug)
                 elif isinstance(curr_aug, A.DualTransform):
                     spatial_augs.append(curr_aug)
-                elif isinstance(curr_aug, A_batched.BatchBasedTransform):
+                elif isinstance(curr_aug, BatchBasedTransform):
                     self.is_batched = True
-                    self.aug_batch_size = max(self.aug_batch_size, curr_aug.n_tiles)
+                    self.aug_batch_size = max(self.aug_batch_size, curr_aug.batch_size)
                     batched_augs.append(curr_aug)
         # NOTE: always perform resize last
         spatial_augs.append(resize)
 
-        batch_transform = A_batched.BatchCompose(
+        batch_transform = BatchCompose(
             [
-                A_batched.ForEach(pixel_augs),
+                ForEach(pixel_augs),
                 *batched_augs,
             ],
             bbox_params=A.BboxParams(
@@ -236,7 +229,8 @@ def prepare_img_annotations(
         @param ih: Input image height
         @type iw: int
         @param iw: Input image width
-        @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]
+        @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
+            np.ndarray, np.ndarray]
         @return: Annotations in albumentations format
         """
 
@@ -535,519 +529,3 @@ def __init__(
 AUGMENTATIONS.register_module(module=A.SmallestMaxSize)
 AUGMENTATIONS.register_module(module=A.Transpose)
 AUGMENTATIONS.register_module(module=A.VerticalFlip)
-
-
-@AUGMENTATIONS.register_module()
-class LetterboxResize(DualTransform):
-    def __init__(
-        self,
-        height: int,
-        width: int,
-        interpolation: int = cv2.INTER_LINEAR,
-        border_value: int = 0,
-        mask_value: int = 0,
-        always_apply: bool = False,
-        p: float = 1.0,
-    ):
-        """Augmentation to apply letterbox resizing to images. Also transforms masks,
-        bboxes and keypoints to correct shape.
-
-        @param height: Desired height of the output.
-        @type height: int
-        @param width: Desired width of the output.
-        @type width: int
-        @param interpolation: Cv2 flag to specify interpolation used when resizing.
-            Defaults to cv2.INTER_LINEAR.
-        @type interpolation: int, optional
-        @param border_value: Padding value for images. Defaults to 0.
-        @type border_value: int, optional
-        @param mask_value: Padding value for masks. Defaults to 0.
-        @type mask_value: int, optional
-        @param always_apply: Whether to always apply the transform. Defaults to False.
-        @type always_apply: bool, optional
-        @param p: Probability of applying the transform. Defaults to 1.0.
-        @type p: float, optional
-        """
-
-        super().__init__(always_apply, p)
-
-        if not (0 <= border_value <= 255):
-            raise ValueError("Border value must be in range [0,255].")
-
-        if not (0 <= mask_value <= 255):
-            raise ValueError("Mask value must be in range [0,255].")
-
-        self.height = height
-        self.width = width
-        self.interpolation = interpolation
-        self.border_value = border_value
-        self.mask_value = mask_value
-
-    def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]:
-        """Updates augmentation parameters with the necessary metadata.
-
-        @param params: The existing augmentation parameters dictionary.
-        @type params: Dict[str, Any]
-        @param kwargs: Additional keyword arguments to add the parameters.
-        @type kwargs: Any
-        @return: Updated dictionary containing the merged parameters.
-        @rtype: Dict[str, Any]
-        """
-
-        params = super().update_params(params, **kwargs)
-
-        img_height = params["rows"]
-        img_width = params["cols"]
-
-        ratio = min(self.height / img_height, self.width / img_width)
-        new_height = int(img_height * ratio)
-        new_width = int(img_width * ratio)
-
-        # only supports center alignment
-        pad_top = (self.height - new_height) // 2
-        pad_bottom = pad_top
-
-        pad_left = (self.width - new_width) // 2
-        pad_right = pad_left
-
-        params.update(
-            {
-                "pad_top": pad_top,
-                "pad_bottom": pad_bottom,
-                "pad_left": pad_left,
-                "pad_right": pad_right,
-            }
-        )
-
-        return params
-
-    def apply(
-        self,
-        img: np.ndarray,
-        pad_top: int,
-        pad_bottom: int,
-        pad_left: int,
-        pad_right: int,
-        **kwargs,
-    ) -> np.ndarray:
-        """Applies the letterbox augmentation to an image.
-
-        @param img: Input image to which resize is applied.
-        @type img: np.ndarray
-        @param pad_top: Number of pixels to pad at the top.
-        @type pad_top: int
-        @param pad_bottom: Number of pixels to pad at the bottom.
-        @type pad_bottom: int
-        @param pad_left: Number of pixels to pad on the left.
-        @type pad_left: int
-        @param pad_right: Number of pixels to pad on the right.
-        @type pad_right: int
-        @param params: Additional parameters for the padding operation.
-        @type params: Any
-        @return: Image with applied letterbox resize.
-        @rtype: np.ndarray
-        """
-
-        resized_img = cv2.resize(
-            img,
-            (self.width - pad_left - pad_right, self.height - pad_top - pad_bottom),
-            interpolation=self.interpolation,
-        )
-        img_out = cv2.copyMakeBorder(
-            resized_img,
-            pad_top,
-            pad_bottom,
-            pad_left,
-            pad_right,
-            cv2.BORDER_CONSTANT,
-            self.border_value,
-        )
-        img_out = img_out.astype(img.dtype)
-        return img_out
-
-    def apply_to_mask(
-        self,
-        img: np.ndarray,
-        pad_top: int,
-        pad_bottom: int,
-        pad_left: int,
-        pad_right: int,
-        **params,
-    ) -> np.ndarray:
-        """Applies letterbox augmentation to the input mask.
-
-        @param img: Input mask to which resize is applied.
-        @type img: np.ndarray
-        @param pad_top: Number of pixels to pad at the top.
-        @type pad_top: int
-        @param pad_bottom: Number of pixels to pad at the bottom.
-        @type pad_bottom: int
-        @param pad_left: Number of pixels to pad on the left.
-        @type pad_left: int
-        @param pad_right: Number of pixels to pad on the right.
-        @type pad_right: int
-        @param params: Additional parameters for the padding operation.
-        @type params: Any
-        @return: Mask with applied letterbox resize.
-        @rtype: np.ndarray
-        """
-
-        resized_img = cv2.resize(
-            img,
-            (self.width - pad_left - pad_right, self.height - pad_top - pad_bottom),
-            interpolation=cv2.INTER_NEAREST,
-        )
-        img_out = cv2.copyMakeBorder(
-            resized_img,
-            pad_top,
-            pad_bottom,
-            pad_left,
-            pad_right,
-            cv2.BORDER_CONSTANT,
-            self.mask_value,
-        )
-        img_out = img_out.astype(img.dtype)
-        return img_out
-
-    def apply_to_bbox(
-        self,
-        bbox: BoxInternalType,
-        pad_top: int,
-        pad_bottom: int,
-        pad_left: int,
-        pad_right: int,
-        **params,
-    ) -> BoxInternalType:
-        """Applies letterbox augmentation to the bounding box.
-
-        @param img: Bounding box to which resize is applied.
-        @type img: BoxInternalType
-        @param pad_top: Number of pixels to pad at the top.
-        @type pad_top: int
-        @param pad_bottom: Number of pixels to pad at the bottom.
-        @type pad_bottom: int
-        @param pad_left: Number of pixels to pad on the left.
-        @type pad_left: int
-        @param pad_right: Number of pixels to pad on the right.
-        @type pad_right: int
-        @param params: Additional parameters for the padding operation.
-        @type params: Any
-        @return: Bounding box with applied letterbox resize.
-        @rtype: BoxInternalType
-        """
-
-        x_min, y_min, x_max, y_max = denormalize_bbox(
-            bbox, self.height - pad_top - pad_bottom, self.width - pad_left - pad_right
-        )[:4]
-        bbox = np.array(
-            [x_min + pad_left, y_min + pad_top, x_max + pad_left, y_max + pad_top]
-        )
-        # clip bbox to image, ignoring padding
-        bbox = bbox.clip(
-            min=[pad_left, pad_top] * 2,
-            max=[self.width - pad_left, self.height - pad_top] * 2,
-        ).tolist()
-        return normalize_bbox(bbox, self.height, self.width)
-
-    def apply_to_keypoint(
-        self,
-        keypoint: KeypointInternalType,
-        pad_top: int,
-        pad_bottom: int,
-        pad_left: int,
-        pad_right: int,
-        **params,
-    ) -> KeypointInternalType:
-        """Applies letterbox augmentation to the keypoint.
-
-        @param img: Keypoint to which resize is applied.
-        @type img: KeypointInternalType
-        @param pad_top: Number of pixels to pad at the top.
-        @type pad_top: int
-        @param pad_bottom: Number of pixels to pad at the bottom.
-        @type pad_bottom: int
-        @param pad_left: Number of pixels to pad on the left.
-        @type pad_left: int
-        @param pad_right: Number of pixels to pad on the right.
-        @type pad_right: int
-        @param params: Additional parameters for the padding operation.
-        @type params: Any
-        @return: Keypoint with applied letterbox resize.
-        @rtype: KeypointInternalType
-        """
-
-        x, y, angle, scale = keypoint[:4]
-        scale_x = (self.width - pad_left - pad_right) / params["cols"]
-        scale_y = (self.height - pad_top - pad_bottom) / params["rows"]
-        new_x = (x * scale_x) + pad_left
-        new_y = (y * scale_y) + pad_top
-        # if keypoint is in the padding then set coordinates to -1
-        out_keypoint = (
-            new_x
-            if not self._out_of_bounds(new_x, pad_left, self.width - pad_left)
-            else -1,
-            new_y
-            if not self._out_of_bounds(new_y, pad_top, self.height - pad_top)
-            else -1,
-            angle,
-            scale * max(scale_x, scale_y),
-        )
-        return out_keypoint
-
-    def get_transform_init_args_names(self) -> Tuple[str, ...]:
-        """Gets the default arguments for the letterbox augmentation.
-
-        @return: The string keywords of the arguments.
-        @rtype: Tuple[str, ...]
-        """
-
-        return ("height", "width", "interpolation", "border_value", "mask_value")
-
-    def _out_of_bounds(self, value: float, min_limit: float, max_limit: float) -> bool:
-        """ "Check if the given value is outside the specified limits.
-
-        @param value: The value to be checked.
-        @type value: float
-        @param min_limit: Minimum limit.
-        @type min_limit: float
-        @param max_limit: Maximum limit.
-        @type max_limit: float
-        @return: True if the value is outside the specified limits, False otherwise.
-        @rtype: bool
-        """
-        return value < min_limit or value > max_limit
-
-
-@AUGMENTATIONS.register_module(name="Mosaic4")
-class DeterministicMosaic4(A_batched.Mosaic4):
-    def __init__(
-        self,
-        out_height: int,
-        out_width: int,
-        value: Optional[Union[int, float, List[int], List[float]]] = None,
-        replace: bool = False,
-        out_batch_size: int = 1,
-        mask_value: Optional[Union[int, float, List[int], List[float]]] = None,
-        always_apply: bool = False,
-        p: float = 0.5,
-    ):
-        """Mosaic augmentation arranges selected four images into single image in a 2x2
-        grid layout. This is done in deterministic way meaning first image in the batch
-        will always be in top left. The input images should have the same number of
-        channels but can have different widths and heights. The output is cropped around
-        the intersection point of the four images with the size (out_with x out_height).
-        If the mosaic image is smaller than with x height, the gap is filled by the
-        fill_value.
-
-        @param out_height: Output image height. The mosaic image is cropped by this height around the mosaic center.
-        If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
-        @type out_height: int
-
-        @param out_width: Output image width. The mosaic image is cropped by this height around the mosaic center.
-        If the size of the mosaic image is smaller than this value the gap is filled by the `value`.
-        @type out_width: int
-
-        @param value: Padding value. Defaults to None.
-        @type value: Optional[Union[int, float, List[int], List[float]]], optional
-
-        @param replace: Whether to replace the original images in the mosaic. Current implementation
-        only supports this set to False. Defaults to False.
-        @type replace: bool, optional
-
-        @param out_batch_size: Number of output images in the batch. Defaults to 1.
-        @type out_batch_size: int, optional
-
-        @param mask_value: Padding value for masks. Defaults to None.
-        @type mask_value: Optional[Union[int, float, List[int], List[float]]], optional
-
-        @param always_apply: Whether to always apply the transform. Defaults to False.
-        @type always_apply: bool, optional
-
-        @param p: Probability of applying the transform. Defaults to 0.5.
-        @type p: float, optional
-        """
-
-        super().__init__(
-            out_height,
-            out_width,
-            value,
-            replace,
-            out_batch_size,
-            mask_value,
-            always_apply,
-            p,
-        )
-        warnings.warn(
-            "Only deterministic version of Mosaic4 is available, setting replace=False."
-        )
-        self.replace = False
-
-    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
-        """Get parameters dependent on the targets.
-
-        @param params: Dictionary containing parameters.
-        @type params: Dict[str, Any]
-        @return: Dictionary containing parameters dependent on the targets.
-        @rtype: Dict[str, Any]
-        """
-        target_params = super().get_params_dependent_on_targets(params)
-        target_params["indices"] = list(range(self.n_tiles))
-        return target_params
-
-
-@AUGMENTATIONS.register_module()
-class MixUp(A_batched.BatchBasedTransform):
-    def __init__(
-        self,
-        alpha: Union[float, Tuple[float, float]] = 0.5,
-        always_apply: bool = False,
-        p: float = 0.5,
-    ):
-        """MixUp augmentation that merges two images and their annotations into one. If
-        images are not of same size then second one is first resized to match the first
-        one.
-
-        @param alpha: Mixing coefficient, either a single float or a tuple representing
-            the range. Defaults to 0.5.
-        @type alpha: Union[float, Tuple[float, float]], optional
-        @param always_apply: Whether to always apply the transform. Defaults to False.
-        @type always_apply: bool, optional
-        @param p: Probability of applying the transform. Defaults to 0.5.
-        @type p: float, optional
-        """
-        super().__init__(always_apply=always_apply, p=p)
-
-        self.alpha = alpha
-        self.n_tiles = 2
-        self.out_batch_size = 1
-
-    def get_transform_init_args_names(self) -> Tuple[str, ...]:
-        """Gets the default arguments for the mixup augmentation.
-
-        @return: The string keywords of the arguments.
-        @rtype: Tuple[str, ...]
-        """
-        return ("alpha", "out_batch_size")
-
-    @property
-    def targets_as_params(self) -> List[str]:
-        """List of augmentation targets.
-
-        @return: Output list of augmentation targets.
-        @rtype: List[str]
-        """
-        return ["image_batch"]
-
-    def apply_to_image_batch(
-        self,
-        image_batch: List[np.ndarray],
-        image_shapes: List[Tuple[int, int]],
-        **params,
-    ) -> List[np.ndarray]:
-        """Applies the transformation to a batch of images.
-
-        @param image_batch: Batch of input images to which the transformation is
-            applied.
-        @type image_batch: List[np.ndarray]
-        @param image_shapes: Shapes of the input images in the batch.
-        @type image_shapes: List[Tuple[int, int]]
-        @param params: Additional parameters for the transformation.
-        @type params: Any
-        @return: List of transformed images.
-        @rtype: List[np.ndarray]
-        """
-        image1 = image_batch[0]
-        # resize second image to size of the first one
-        image2 = cv2.resize(image_batch[1], (image_shapes[0][1], image_shapes[0][0]))
-
-        if isinstance(self.alpha, float):
-            curr_alpha = np.clip(self.alpha, 0, 1)
-        else:
-            curr_alpha = random.uniform(max(self.alpha[0], 0), min(self.alpha[1], 1))
-        img_out = cv2.addWeighted(image1, curr_alpha, image2, 1 - curr_alpha, 0.0)
-        return [img_out]
-
-    def apply_to_mask_batch(
-        self,
-        mask_batch: List[np.ndarray],
-        image_shapes: List[Tuple[int, int]],
-        **params,
-    ) -> List[np.ndarray]:
-        """Applies the transformation to a batch of masks.
-
-        @param image_batch: Batch of input masks to which the transformation is applied.
-        @type image_batch: List[np.ndarray]
-        @param image_shapes: Shapes of the input images in the batch.
-        @type image_shapes: List[Tuple[int, int]]
-        @param params: Additional parameters for the transformation.
-        @type params: Any
-        @return: List of transformed masks.
-        @rtype: List[np.ndarray]
-        """
-        mask1 = mask_batch[0]
-        mask2 = cv2.resize(
-            mask_batch[1],
-            (image_shapes[0][1], image_shapes[0][0]),
-            interpolation=cv2.INTER_NEAREST,
-        )
-        out_mask = mask1 + mask2
-        # if masks intersect keep one present in first image
-        mask_inter = mask1 > 0
-        out_mask[mask_inter] = mask1[mask_inter]
-        return [out_mask]
-
-    def apply_to_bboxes_batch(
-        self, bboxes_batch: List[BoxType], image_shapes: List[Tuple[int, int]], **params
-    ) -> List[BoxType]:
-        """Applies the transformation to a batch of bboxes.
-
-        @param image_batch: Batch of input bboxes to which the transformation is
-            applied.
-        @type image_batch: List[BoxType]
-        @param image_shapes: Shapes of the input images in the batch.
-        @type image_shapes: List[Tuple[int, int]]
-        @param params: Additional parameters for the transformation.
-        @type params: Any
-        @return: List of transformed bboxes.
-        @rtype: List[BoxType]
-        """
-        return [bboxes_batch[0] + bboxes_batch[1]]
-
-    def apply_to_keypoints_batch(
-        self,
-        keypoints_batch: List[KeypointType],
-        image_shapes: List[Tuple[int, int]],
-        **params,
-    ) -> List[KeypointType]:
-        """Applies the transformation to a batch of keypoints.
-
-        @param image_batch: Batch of input keypoints to which the transformation is
-            applied.
-        @type image_batch: List[BoxType]
-        @param image_shapes: Shapes of the input images in the batch.
-        @type image_shapes: List[Tuple[int, int]]
-        @param params: Additional parameters for the transformation.
-        @type params: Any
-        @return: List of transformed keypoints.
-        @rtype: List[BoxType]
-        """
-        scaled_kpts2 = []
-        scale_x = image_shapes[0][1] / image_shapes[1][1]
-        scale_y = image_shapes[0][0] / image_shapes[1][0]
-        for kpt in keypoints_batch[1]:
-            new_kpt = A.augmentations.geometric.functional.keypoint_scale(
-                keypoint=kpt, scale_x=scale_x, scale_y=scale_y
-            )
-            scaled_kpts2.append(new_kpt + kpt[4:])
-        return [keypoints_batch[0] + scaled_kpts2]
-
-    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
-        """Get parameters dependent on the targets.
-
-        @param params: Dictionary containing parameters.
-        @type params: Dict[str, Any]
-        @return: Dictionary containing parameters dependent on the targets.
-        @rtype: Dict[str, Any]
-        """
-        image_batch = params["image_batch"]
-        return {"image_shapes": [image.shape[:2] for image in image_batch]}
diff --git a/luxonis_ml/data/utils/aug_utils/__init__.py b/luxonis_ml/data/utils/aug_utils/__init__.py
deleted file mode 100644
index 16f11108..00000000
--- a/luxonis_ml/data/utils/aug_utils/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .batch_compose import BatchCompose, ForEach, Repeat
-from .batch_transform import BatchBasedTransform
-from .mosaic4 import Mosaic4
-
-__all__ = ["BatchCompose", "ForEach", "Repeat", "BatchBasedTransform", "Mosaic4"]

From d1e8ef972788861a7de95ebc158f6278536ee6a8 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Mon, 8 Jan 2024 13:19:03 +0000
Subject: [PATCH 05/10] [Automated] Updated coverage badge

---
 media/coverage_badge.svg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 1d809e79..e123fe5b 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">10%</text>
-        <text x="80" y="14">10%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">9%</text>
+        <text x="80" y="14">9%</text>
     </g>
 </svg>

From 1cd2f335c82111e8b0b95a5f17b35b27f66866e0 Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Mon, 8 Jan 2024 14:54:33 +0100
Subject: [PATCH 06/10] fixed docstrings

---
 .../data/augmentations/batch_compose.py       | 106 +++++++-----------
 luxonis_ml/data/augmentations/batch_utils.py  |  44 +-------
 2 files changed, 46 insertions(+), 104 deletions(-)

diff --git a/luxonis_ml/data/augmentations/batch_compose.py b/luxonis_ml/data/augmentations/batch_compose.py
index 78473ae0..d289f6e4 100644
--- a/luxonis_ml/data/augmentations/batch_compose.py
+++ b/luxonis_ml/data/augmentations/batch_compose.py
@@ -20,25 +20,6 @@
 
 
 class Compose(BaseCompose):
-    """Compose transforms and handle all transformations regarding bounding boxes.
-
-    @param transforms: List of transformations to compose
-    @type transforms: TransformsSeqType
-    @param bboxparams: Parameters for bounding boxes transforms. Defaults to None.
-    @type bboxparams: Optional[Union[dict, BboxParams]]
-    @param keypoint_params: Parameters for keypoint transforms. Defaults to None.
-    @type keypoint_params: Optional[Union[dict, KeypointParams]]
-    @param additional_targets: Dict with keys - new target name, values - old target
-    name. ex: {'image2': 'image'}. Defaults to None.
-    @type additional_targets: Optional[Dict[str, str]]
-    @param p: Probability of applying all list of transforms. Defaults to 1.0.
-    @type p: float
-    @param is_check_shapes: If True shapes consistency of images/mask/masks would be checked on
-    each call. If you would like to disable this check - pass False (do it only if you are sure
-    in your data consistency). Defaults to True.
-    @type is_check_shapes: bool
-    """
-
     def __init__(
         self,
         transforms: TransformsSeqType,
@@ -48,6 +29,24 @@ def __init__(
         p: float = 1.0,
         is_check_shapes: bool = True,
     ):
+        """Compose transforms and handle all transformations regarding bounding boxes.
+
+        @param transforms: List of transformations to compose
+        @type transforms: TransformsSeqType
+        @param bboxparams: Parameters for bounding boxes transforms. Defaults to None.
+        @type bboxparams: Optional[Union[dict, BboxParams]]
+        @param keypoint_params: Parameters for keypoint transforms. Defaults to None.
+        @type keypoint_params: Optional[Union[dict, KeypointParams]]
+        @param additional_targets: Dict with keys - new target name, values - old target
+        name. ex: {'image2': 'image'}. Defaults to None.
+        @type additional_targets: Optional[Dict[str, str]]
+        @param p: Probability of applying all list of transforms. Defaults to 1.0.
+        @type p: float
+        @param is_check_shapes: If True shapes consistency of images/mask/masks would be checked on
+        each call. If you would like to disable this check - pass False (do it only if you are sure
+        in your data consistency). Defaults to True.
+        @type is_check_shapes: bool
+        """
         super(Compose, self).__init__(transforms, p)
 
         self.processors: Dict[str, DataProcessor] = {}
@@ -241,29 +240,6 @@ def _make_targets_contiguous(self, data: Dict[str, Any]) -> Dict[str, Any]:
 
 
 class BatchCompose(Compose):
-    """Compose designed to handle the multi-image transforms The contents can be a
-    subclass of `BatchBasedTransform` or other transforms enclosed by ForEach container.
-    All targets' names should have the suffix "_batch", ex ("image_batch",
-    "bboxes_batch"). Note this nameing rule is applied to the `label_fields` of the
-    `BboxParams` and the `KeypointsParams`.
-
-    @param transforms: List of transformations to compose
-    @type transforms: TransformsSeqType
-    @param bboxparams: Parameters for bounding boxes transforms. Defaults to None.
-    @type bboxparams: Optional[Union[dict, BboxParams]]
-    @param keypoint_params: Parameters for keypoint transforms. Defaults to None.
-    @type keypoint_params: Optional[Union[dict, KeypointParams]]
-    @param additional_targets: Dict with keys - new target name, values - old target
-    name. ex: {'image2': 'image'}. Defaults to None.
-    @type additional_targets: Optional[Dict[str, str]]
-    @param p: Probability of applying all list of transforms. Defaults to 1.0.
-    @type p: float
-    @param is_check_shapes: If True shapes consistency of images/mask/masks would be checked on
-    each call. If you would like to disable this check - pass False (do it only if you are sure
-    in your data consistency). Defaults to True.
-    @type is_check_shapes: bool
-    """
-
     def __init__(
         self,
         transforms: TransformsSeqType,
@@ -273,6 +249,28 @@ def __init__(
         p: float = 1.0,
         is_check_shapes: bool = True,
     ):
+        """Compose designed to handle the multi-image transforms The contents can be a
+        subclass of `BatchBasedTransform` or other transforms enclosed by ForEach
+        container. All targets' names should have the suffix "_batch", ex
+        ("image_batch", "bboxes_batch"). Note this nameing rule is applied to the
+        `label_fields` of the `BboxParams` and the `KeypointsParams`.
+
+        @param transforms: List of transformations to compose
+        @type transforms: TransformsSeqType
+        @param bboxparams: Parameters for bounding boxes transforms. Defaults to None.
+        @type bboxparams: Optional[Union[dict, BboxParams]]
+        @param keypoint_params: Parameters for keypoint transforms. Defaults to None.
+        @type keypoint_params: Optional[Union[dict, KeypointParams]]
+        @param additional_targets: Dict with keys - new target name, values - old target
+        name. ex: {'image2': 'image'}. Defaults to None.
+        @type additional_targets: Optional[Dict[str, str]]
+        @param p: Probability of applying all list of transforms. Defaults to 1.0.
+        @type p: float
+        @param is_check_shapes: If True shapes consistency of images/mask/masks would be checked on
+        each call. If you would like to disable this check - pass False (do it only if you are sure
+        in your data consistency). Defaults to True.
+        @type is_check_shapes: bool
+        """
         super(BatchCompose, self).__init__(
             transforms=transforms,
             bbox_params=bbox_params,
@@ -326,19 +324,7 @@ def _make_targets_contiguous(self, batched_data: Dict[str, Any]) -> Dict[str, An
 
 class ForEach(BaseCompose):
     """Apply transforms for each batch element This expects batched input and can be
-    contained by the `BatchCompose`.
-
-    This can contains any other transforms but a subclass of the `BatchBasedTransform`.
-    Internally, this container works as the following way:
-    ```
-        for i in range(batch_size):
-            for t in transforms:
-                processed = t({"image": data["image_batch"][i], "bboxes": data["bboxes_batch"][i], ...})
-                data["image_batch"][i] = processed["image"]
-                ...
-        return data
-    ```
-    """
+    contained by the `BatchCompose`."""
 
     def __init__(self, transforms: TransformsSeqType, p: float = 0.5):
         super().__init__(transforms, p)
@@ -371,16 +357,6 @@ class Repeat(BaseCompose):
     This expects batched input and can be contained by the `BatchCompose`.
     The contained transforms should be a subbclass of the `BatchBasedTransform`.
     Internally, this container works as the following way:
-    ```
-        out_data = {"image_batch": [], ...}
-        for _ in range(n):
-            data = in_data
-            for t in transforms:
-                data = t(**data)
-            out_data["image_batch"] += data["image_batch"]
-            ...
-        return out_data
-    ```
     Note: This class assumes that each transform does not modify the input data.
     """
 
diff --git a/luxonis_ml/data/augmentations/batch_utils.py b/luxonis_ml/data/augmentations/batch_utils.py
index 1206bdf8..126d1c28 100644
--- a/luxonis_ml/data/augmentations/batch_utils.py
+++ b/luxonis_ml/data/augmentations/batch_utils.py
@@ -2,16 +2,7 @@
 
 
 def batch2list(data: Dict[str, List]) -> List[Dict[str, Any]]:
-    """Convert from a batched target dict to list of normal target dicts
-    ex:
-    {"image_batch": image_batch, "bboxes_batch": bboxes_batch, ...}
-    =>
-    [
-        {"image": image_batch[0], "bboxes": bboxes_batch[0], ...},
-        {"image": image_batch[1], "bboxes": bboxes_batch[1], ...},
-        ...
-    ]
-    """
+    """Convert from a batched target dict to list of normal target dicts."""
     if "image_batch" not in data:
         raise ValueError("Batch-based transform should have `image_batch` target")
     batch_size = len(data["image_batch"])
@@ -30,16 +21,7 @@ def batch2list(data: Dict[str, List]) -> List[Dict[str, Any]]:
 
 
 def list2batch(data: List[Dict[str, Any]]) -> Dict[str, List]:
-    """Convert from a list of normal target dicts to a batched target dict
-    ex:
-    [
-        {"image": image_batch[0], "bboxes": bboxes_batch[0], ...},
-        {"image": image_batch[1], "bboxes": bboxes_batch[1], ...},
-        ...
-    ]
-    =>
-    {"image_batch": image_batch, "bboxes_batch": bboxes_batch, ...}
-    """
+    """Convert from a list of normal target dicts to a batched target dict."""
 
     if len(data) == 0:
         raise ValueError("The input should have at least one item.")
@@ -56,10 +38,7 @@ def list2batch(data: List[Dict[str, Any]]) -> Dict[str, List]:
 
 def to_unbatched_name(batched_name: str) -> str:
     """Get a normal target name from a batched target name If the given name does not
-    have "_batched" suffix, ValueError will be raised.
-
-    ex. `abc --> abc_batched`
-    """
+    have "_batched" suffix, ValueError will be raised."""
     if not batched_name.endswith("_batch"):
         raise ValueError(
             f"Batched target name must have '_batch' suffix, got `{batched_name}`"
@@ -69,10 +48,7 @@ def to_unbatched_name(batched_name: str) -> str:
 
 def to_batched_name(name: str) -> str:
     """Get a unbatched target name from a normal target name If the given name already
-    has had "_batched" suffix, ValueError will be raised.
-
-    ex. `abc_batched --> abc `
-    """
+    has had "_batched" suffix, ValueError will be raised."""
 
     if name.endswith("_batch"):
         raise ValueError(
@@ -82,17 +58,7 @@ def to_batched_name(name: str) -> str:
 
 
 def concat_batches(batches: List[Dict[str, List]]) -> Dict[str, List]:
-    """Concatenate batched targets
-     ex:
-      [
-        {"image_batch": image_batch1, "bboxes_batch": bboxes_batch1, ...}
-        {"image_batch": image_batch1, "bboxes_batch": bboxes_batch1, ...}
-      ]
-    =>
-      {
-        "image_batch": image_batch1 + image_batch2, "bboxes_batch": bboxes_batch1 + bboxes_batch2, ...
-      }
-    """
+    """Concatenate batched targets."""
 
     n_batches = len(batches)
     if n_batches == 0:

From ccc5cdfa2ebc65ed1e407fc9b7b754d940f02e81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 8 Jan 2024 15:20:08 +0100
Subject: [PATCH 07/10] Update requirements.txt

---
 luxonis_ml/data/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/luxonis_ml/data/requirements.txt b/luxonis_ml/data/requirements.txt
index 5784644e..cb535adb 100644
--- a/luxonis_ml/data/requirements.txt
+++ b/luxonis_ml/data/requirements.txt
@@ -6,7 +6,7 @@ PyYAML>=6.0
 label-studio-sdk>=0.0.28
 # boto3>=1.17.39
 # google-cloud-storage>=2.10.0
-albumentations==1.3.1
+albumentations>=1.3.1
 pandas>=2.0.0
 pyarrow>=13.0.0
 pycocotools>=2.0.7

From d858f5a21d3f356ded9cbd857541c6ea06e15fc4 Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Mon, 8 Jan 2024 23:58:40 +0100
Subject: [PATCH 08/10] moved custom augmentations to separate dir

---
 luxonis_ml/data/augmentations/__init__.py                    | 4 +---
 luxonis_ml/data/augmentations/custom/__init__.py             | 5 +++++
 .../data/augmentations/{ => custom}/letterbox_resize.py      | 2 +-
 luxonis_ml/data/augmentations/{ => custom}/mixup.py          | 4 ++--
 luxonis_ml/data/augmentations/{ => custom}/mosaic.py         | 4 ++--
 5 files changed, 11 insertions(+), 8 deletions(-)
 create mode 100644 luxonis_ml/data/augmentations/custom/__init__.py
 rename luxonis_ml/data/augmentations/{ => custom}/letterbox_resize.py (99%)
 rename luxonis_ml/data/augmentations/{ => custom}/mixup.py (98%)
 rename luxonis_ml/data/augmentations/{ => custom}/mosaic.py (99%)

diff --git a/luxonis_ml/data/augmentations/__init__.py b/luxonis_ml/data/augmentations/__init__.py
index 18408eab..c771e20b 100644
--- a/luxonis_ml/data/augmentations/__init__.py
+++ b/luxonis_ml/data/augmentations/__init__.py
@@ -1,6 +1,4 @@
-from .letterbox_resize import LetterboxResize
-from .mixup import MixUp
-from .mosaic import Mosaic4
+from .custom import LetterboxResize, MixUp, Mosaic4
 from .utils import Augmentations, TrainAugmentations, ValAugmentations
 
 __all__ = [
diff --git a/luxonis_ml/data/augmentations/custom/__init__.py b/luxonis_ml/data/augmentations/custom/__init__.py
new file mode 100644
index 00000000..29563f03
--- /dev/null
+++ b/luxonis_ml/data/augmentations/custom/__init__.py
@@ -0,0 +1,5 @@
+from .letterbox_resize import LetterboxResize
+from .mixup import MixUp
+from .mosaic import Mosaic4
+
+__all__ = ["LetterboxResize", "MixUp", "Mosaic4"]
diff --git a/luxonis_ml/data/augmentations/letterbox_resize.py b/luxonis_ml/data/augmentations/custom/letterbox_resize.py
similarity index 99%
rename from luxonis_ml/data/augmentations/letterbox_resize.py
rename to luxonis_ml/data/augmentations/custom/letterbox_resize.py
index 1dbd2b37..ccdad265 100644
--- a/luxonis_ml/data/augmentations/letterbox_resize.py
+++ b/luxonis_ml/data/augmentations/custom/letterbox_resize.py
@@ -5,7 +5,7 @@
 from albumentations import BoxType, DualTransform, KeypointType
 from albumentations.core.bbox_utils import denormalize_bbox, normalize_bbox
 
-from .utils import AUGMENTATIONS
+from ..utils import AUGMENTATIONS
 
 
 @AUGMENTATIONS.register_module()
diff --git a/luxonis_ml/data/augmentations/mixup.py b/luxonis_ml/data/augmentations/custom/mixup.py
similarity index 98%
rename from luxonis_ml/data/augmentations/mixup.py
rename to luxonis_ml/data/augmentations/custom/mixup.py
index 204d4fe8..b788c705 100644
--- a/luxonis_ml/data/augmentations/mixup.py
+++ b/luxonis_ml/data/augmentations/custom/mixup.py
@@ -6,8 +6,8 @@
 import numpy as np
 from albumentations import BoxType, KeypointType
 
-from .batch_transform import BatchBasedTransform
-from .utils import AUGMENTATIONS
+from ..batch_transform import BatchBasedTransform
+from ..utils import AUGMENTATIONS
 
 
 @AUGMENTATIONS.register_module()
diff --git a/luxonis_ml/data/augmentations/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py
similarity index 99%
rename from luxonis_ml/data/augmentations/mosaic.py
rename to luxonis_ml/data/augmentations/custom/mosaic.py
index 0556b3f4..62a5a6eb 100644
--- a/luxonis_ml/data/augmentations/mosaic.py
+++ b/luxonis_ml/data/augmentations/custom/mosaic.py
@@ -12,8 +12,8 @@
     KeypointInternalType,
 )
 
-from .batch_transform import BatchBasedTransform
-from .utils import AUGMENTATIONS
+from ..batch_transform import BatchBasedTransform
+from ..utils import AUGMENTATIONS
 
 
 @AUGMENTATIONS.register_module()

From c5a4c624f90bd7d8d44881c4bf123aba62370bae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 9 Jan 2024 00:17:22 +0100
Subject: [PATCH 09/10] Update batch_transform.py

---
 luxonis_ml/data/augmentations/batch_transform.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/luxonis_ml/data/augmentations/batch_transform.py b/luxonis_ml/data/augmentations/batch_transform.py
index 70e7510c..a5dd32ed 100644
--- a/luxonis_ml/data/augmentations/batch_transform.py
+++ b/luxonis_ml/data/augmentations/batch_transform.py
@@ -9,8 +9,6 @@
 
 
 class BatchBasedTransform(BasicTransform):
-    """."""
-
     def __init__(self, batch_size: int, **kwargs):
         """Transform for multi-image.
 
@@ -33,7 +31,7 @@ def targets(self) -> Dict[str, Callable]:
         }
 
     def update_params(self, params: Dict[str, Any], **kwargs) -> Dict[str, Any]:
-        # This overwrites the `supre().update_params(...)`
+        # This overwrites the `super().update_params(...)`
         return params
 
     def apply_to_image_batch(

From 576f636e2ec50c9e608a419d32cd1572afbf12ac Mon Sep 17 00:00:00 2001
From: klemen1999 <klemenskrlj8@gmail.com>
Date: Tue, 9 Jan 2024 09:15:43 +0100
Subject: [PATCH 10/10] added docstrings to batch_processors

---
 .../data/augmentations/batch_processors.py       | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/luxonis_ml/data/augmentations/batch_processors.py b/luxonis_ml/data/augmentations/batch_processors.py
index 6bf33b4c..88ffa756 100644
--- a/luxonis_ml/data/augmentations/batch_processors.py
+++ b/luxonis_ml/data/augmentations/batch_processors.py
@@ -12,6 +12,14 @@ class BboxBatchProcessor(DataProcessor):
     def __init__(
         self, params: BboxParams, additional_targets: Optional[Dict[str, str]] = None
     ):
+        """Data processor class to process bbox data in batches.
+
+        @param params: Bbox parameters
+        @type params: BboxParams
+        @param additional_targets: Additional targets of the transform. Defaults to
+            None.
+        @type additional_targets: Optional[Dict[str, str]]
+        """
         super().__init__(params, additional_targets)
         item_params = copy.deepcopy(params)
         if item_params.label_fields is not None:
@@ -78,6 +86,14 @@ def __init__(
         params: KeypointParams,
         additional_targets: Optional[Dict[str, str]] = None,
     ):
+        """Data processor class to process keypoint data in batches.
+
+        @param params: Keypoint parameters
+        @type params: KeypointParams
+        @param additional_targets: Additional targets of the transform. Defaults to
+            None.
+        @type additional_targets: Optional[Dict[str, str]]
+        """
         super().__init__(params, additional_targets)
         item_params = copy.deepcopy(params)
         if item_params.label_fields is not None: