Working orientation change, next add config

stefanklut · Nov 30, 2023 · be859f8 · be859f8
1 parent 5659996
commit be859f8
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 9 deletions.
diff --git a/datasets/augmentations.py b/datasets/augmentations.py
@@ -10,6 +10,7 @@
 import detectron2.data.transforms as T
 import numpy as np
 from detectron2.config import CfgNode
+from fvcore.transforms.transform import Transform
 
 sys.path.append(str(Path(__file__).resolve().parent.joinpath("..")))
 from scipy.ndimage import gaussian_filter
@@ -20,6 +21,7 @@
     GaussianFilterTransform,
     GrayscaleTransform,
     HFlipTransform,
+    OrientationTransform,
     ResizeTransform,
     VFlipTransform,
     WarpFieldTransform,
@@ -313,7 +315,7 @@ def __init__(
         r_kappa: float = 30,
         sh_kappa: float = 20,
         sc_stdv: float = 0.12,
-        probs: Optional[Sequence[float]] = None,
+        probabilities: Optional[Sequence[float]] = None,
     ) -> None:
         """
         Apply a random affine transformation to the image
@@ -323,17 +325,17 @@ def __init__(
             r_kappa (float, optional): kappa value used for sampling the rotation. Defaults to 30.
             sh_kappa (float, optional): kappa value used for sampling the shear.. Defaults to 20.
             sc_stdv (float, optional): standard deviation used for the scale. Defaults to 0.12.
-            probs (Optional[Sequence[float]], optional): individual probabilities for each sub category of an affine transformation. When None is given default to all 1.0 Defaults to None.
+            probabilities (Optional[Sequence[float]], optional): individual probabilities for each sub category of an affine transformation. When None is given default to all 1.0 Defaults to None.
         """
         super().__init__()
         self.t_stdv = t_stdv
         self.r_kappa = r_kappa
         self.sh_kappa = sh_kappa
         self.sc_stdv = sc_stdv
 
-        if probs is not None:
-            assert len(probs) == 4, f"{len(probs)}: {probs}"
-            self.probs = probs
+        if probabilities is not None:
+            assert len(probabilities) == 4, f"{len(probabilities)}: {probabilities}"
+            self.probs = probabilities
         else:
             self.probs = [1.0] * 4
 
@@ -702,6 +704,21 @@ def get_transform(self, image):
         return BlendTransform(src_image=np.asarray(0).astype(np.float32), src_weight=1 - w, dst_weight=w)
 
 
+class RandomOrientation(T.Augmentation):
+    def __init__(self, orientation_percentages: Optional[list[float | int]] = None) -> None:
+        super().__init__()
+        if orientation_percentages is None:
+            orientation_percentages = [1.0] * 4
+        array_percentages = np.asarray(orientation_percentages)
+        assert len(array_percentages) == 4, f"{len(array_percentages)}: {array_percentages}"
+        normalized_percentages = array_percentages / np.sum(array_percentages)
+        self.orientation_percentages = normalized_percentages
+
+    def get_transform(self, image) -> Transform:
+        times_90_degrees = np.random.choice(4, p=self.orientation_percentages)
+        return OrientationTransform(times_90_degrees, image.shape[0], image.shape[1])
+
+
 def build_augmentation(cfg: CfgNode, is_train: bool) -> list[T.Augmentation | T.Transform]:
     """
     Function to generate all the augmentations used in the inference and training process
@@ -822,7 +839,7 @@ def build_augmentation(cfg: CfgNode, is_train: bool) -> list[T.Augmentation | T.
                 r_kappa=cfg.INPUT.AFFINE.ROTATION.KAPPA,
                 sh_kappa=cfg.INPUT.AFFINE.SHEAR.KAPPA,
                 sc_stdv=cfg.INPUT.AFFINE.SCALE.STANDARD_DEVIATION,
-                probs=(
+                probabilities=(
                     cfg.INPUT.AFFINE.TRANSLATION.PROBABILITY,
                     cfg.INPUT.AFFINE.ROTATION.PROBABILITY,
                     cfg.INPUT.AFFINE.SHEAR.PROBABILITY,
@@ -883,13 +900,14 @@ def test(args) -> None:
     contrast = RandomContrast()
     brightness = RandomBrightness()
     saturation = RandomSaturation()
+    orientation = RandomOrientation(orientation_percentages=[0, 0, 0, 1])
 
     augs = []
 
     # augs = T.AugmentationList([resize, elastic, affine])
 
-    augs.append(resize)
-    augs.append(elastic)
+    # augs.append(resize)
+    # augs.append(elastic)
     # augs.append(grayscale)
     # augs.append(contrast)
     # augs.append(brightness)
@@ -900,6 +918,7 @@ def test(args) -> None:
     # augs.append(rotation)
     # augs.append(shear)
     # augs.append(scale)
+    augs.append(orientation)
 
     augs_list = T.AugmentationList(augs=augs)
 
@@ -909,6 +928,16 @@ def test(args) -> None:
 
     output_image = input_augs.image
 
+    input_coords = np.asarray([[10, 20], [4000, 4000]])
+
+    output_coords = transforms.apply_coords(input_coords)
+
+    for coord in input_coords:
+        image = cv2.circle(image.copy(), coord, 10, (255, 0, 0), -1)
+
+    for coord in output_coords:
+        output_image = cv2.circle(output_image.copy(), coord, 10, (255, 0, 0), -1)
+
     im = Image.fromarray(image)
     im.show("Original")
 

diff --git a/datasets/transforms.py b/datasets/transforms.py
@@ -1,10 +1,13 @@
 # Modified from P2PaLA
 
 import argparse
+import sys
+from pathlib import Path
 
 import cv2
 import detectron2.data.transforms as T
 import numpy as np
+from fvcore.transforms.transform import Transform
 from scipy.ndimage import affine_transform, gaussian_filter, map_coordinates
 
 # REVIEW Check if there is a benefit for using scipy instead of the standard torchvision
@@ -389,6 +392,7 @@ def __init__(self, image_format: str = "RGB") -> None:
         """
         super().__init__()
 
+        # Previously used to get the grayscale value
         self.rgb_weights = np.asarray([0.299, 0.587, 0.114]).astype(np.float32)
 
         self.image_format = image_format
@@ -566,6 +570,51 @@ def inverse(self) -> T.Transform:
         raise NotImplementedError
 
 
+class OrientationTransform(T.Transform):
+    def __init__(self, times_90_degrees: int, height: int, width: int) -> None:
+        super().__init__()
+        self.times_90_degrees = times_90_degrees % 4
+        self.height = height
+        self.width = width
+
+    def apply_image(self, img: np.ndarray) -> np.ndarray:
+        if self.times_90_degrees == 0:
+            return img
+        elif self.times_90_degrees == 1:
+            return cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
+        elif self.times_90_degrees == 2:
+            return cv2.rotate(img, cv2.ROTATE_180)
+        elif self.times_90_degrees == 3:
+            return cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        else:
+            raise ValueError("Times 90 degrees should be between 0 and 3")
+
+    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
+        if self.times_90_degrees == 0:
+            return coords
+        elif self.times_90_degrees == 1:
+            new_coords = coords.copy()
+            new_coords[:, 0], new_coords[:, 1] = self.height - coords[:, 1], coords[:, 0]
+            return new_coords
+        elif self.times_90_degrees == 2:
+            new_coords = coords.copy()
+            new_coords[:, 0], new_coords[:, 1] = self.width - coords[:, 1], self.height - coords[:, 0]
+            return new_coords
+        elif self.times_90_degrees == 3:
+            new_coords = coords.copy()
+            new_coords[:, 0], new_coords[:, 1] = coords[:, 1], self.width - coords[:, 0]
+            return new_coords
+        else:
+            raise ValueError("Times 90 degrees should be between 0 and 3")
+
+    def inverse(self) -> Transform:
+        if self.times_90_degrees % 2 == 0:
+            height, width = self.height, self.width
+        else:
+            width, height = self.width, self.height
+        return OrientationTransform(4 - self.times_90_degrees, height, width)
+
+
 def get_arguments() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Testing the image augmentation and transformations")
     io_args = parser.add_argument_group("IO")
@@ -590,7 +639,7 @@ def test(args) -> None:
     image = cv2.imread(str(input_path))[..., ::-1]
     print(image.dtype)
 
-    affine = AffineTransform(np.eye(3))
+    affine = RandomRotation().get_transform(image)
     output_image = affine.apply_image(image)
 
     im = Image.fromarray(image)