From be859f80d80aa0877c6a463ad634dede7fdabc20 Mon Sep 17 00:00:00 2001 From: Stefan Klut Date: Thu, 30 Nov 2023 18:08:51 +0100 Subject: [PATCH] Working orientation change, next add config --- datasets/augmentations.py | 45 ++++++++++++++++++++++++++++------ datasets/transforms.py | 51 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 9 deletions(-) diff --git a/datasets/augmentations.py b/datasets/augmentations.py index c6050fe..2cd2f27 100644 --- a/datasets/augmentations.py +++ b/datasets/augmentations.py @@ -10,6 +10,7 @@ import detectron2.data.transforms as T import numpy as np from detectron2.config import CfgNode +from fvcore.transforms.transform import Transform sys.path.append(str(Path(__file__).resolve().parent.joinpath(".."))) from scipy.ndimage import gaussian_filter @@ -20,6 +21,7 @@ GaussianFilterTransform, GrayscaleTransform, HFlipTransform, + OrientationTransform, ResizeTransform, VFlipTransform, WarpFieldTransform, @@ -313,7 +315,7 @@ def __init__( r_kappa: float = 30, sh_kappa: float = 20, sc_stdv: float = 0.12, - probs: Optional[Sequence[float]] = None, + probabilities: Optional[Sequence[float]] = None, ) -> None: """ Apply a random affine transformation to the image @@ -323,7 +325,7 @@ def __init__( r_kappa (float, optional): kappa value used for sampling the rotation. Defaults to 30. sh_kappa (float, optional): kappa value used for sampling the shear.. Defaults to 20. sc_stdv (float, optional): standard deviation used for the scale. Defaults to 0.12. - probs (Optional[Sequence[float]], optional): individual probabilities for each sub category of an affine transformation. When None is given default to all 1.0 Defaults to None. + probabilities (Optional[Sequence[float]], optional): individual probabilities for each sub category of an affine transformation. When None is given default to all 1.0 Defaults to None. """ super().__init__() self.t_stdv = t_stdv @@ -331,9 +333,9 @@ def __init__( self.sh_kappa = sh_kappa self.sc_stdv = sc_stdv - if probs is not None: - assert len(probs) == 4, f"{len(probs)}: {probs}" - self.probs = probs + if probabilities is not None: + assert len(probabilities) == 4, f"{len(probabilities)}: {probabilities}" + self.probs = probabilities else: self.probs = [1.0] * 4 @@ -702,6 +704,21 @@ def get_transform(self, image): return BlendTransform(src_image=np.asarray(0).astype(np.float32), src_weight=1 - w, dst_weight=w) +class RandomOrientation(T.Augmentation): + def __init__(self, orientation_percentages: Optional[list[float | int]] = None) -> None: + super().__init__() + if orientation_percentages is None: + orientation_percentages = [1.0] * 4 + array_percentages = np.asarray(orientation_percentages) + assert len(array_percentages) == 4, f"{len(array_percentages)}: {array_percentages}" + normalized_percentages = array_percentages / np.sum(array_percentages) + self.orientation_percentages = normalized_percentages + + def get_transform(self, image) -> Transform: + times_90_degrees = np.random.choice(4, p=self.orientation_percentages) + return OrientationTransform(times_90_degrees, image.shape[0], image.shape[1]) + + def build_augmentation(cfg: CfgNode, is_train: bool) -> list[T.Augmentation | T.Transform]: """ Function to generate all the augmentations used in the inference and training process @@ -822,7 +839,7 @@ def build_augmentation(cfg: CfgNode, is_train: bool) -> list[T.Augmentation | T. r_kappa=cfg.INPUT.AFFINE.ROTATION.KAPPA, sh_kappa=cfg.INPUT.AFFINE.SHEAR.KAPPA, sc_stdv=cfg.INPUT.AFFINE.SCALE.STANDARD_DEVIATION, - probs=( + probabilities=( cfg.INPUT.AFFINE.TRANSLATION.PROBABILITY, cfg.INPUT.AFFINE.ROTATION.PROBABILITY, cfg.INPUT.AFFINE.SHEAR.PROBABILITY, @@ -883,13 +900,14 @@ def test(args) -> None: contrast = RandomContrast() brightness = RandomBrightness() saturation = RandomSaturation() + orientation = RandomOrientation(orientation_percentages=[0, 0, 0, 1]) augs = [] # augs = T.AugmentationList([resize, elastic, affine]) - augs.append(resize) - augs.append(elastic) + # augs.append(resize) + # augs.append(elastic) # augs.append(grayscale) # augs.append(contrast) # augs.append(brightness) @@ -900,6 +918,7 @@ def test(args) -> None: # augs.append(rotation) # augs.append(shear) # augs.append(scale) + augs.append(orientation) augs_list = T.AugmentationList(augs=augs) @@ -909,6 +928,16 @@ def test(args) -> None: output_image = input_augs.image + input_coords = np.asarray([[10, 20], [4000, 4000]]) + + output_coords = transforms.apply_coords(input_coords) + + for coord in input_coords: + image = cv2.circle(image.copy(), coord, 10, (255, 0, 0), -1) + + for coord in output_coords: + output_image = cv2.circle(output_image.copy(), coord, 10, (255, 0, 0), -1) + im = Image.fromarray(image) im.show("Original") diff --git a/datasets/transforms.py b/datasets/transforms.py index eaf04a6..6b244be 100644 --- a/datasets/transforms.py +++ b/datasets/transforms.py @@ -1,10 +1,13 @@ # Modified from P2PaLA import argparse +import sys +from pathlib import Path import cv2 import detectron2.data.transforms as T import numpy as np +from fvcore.transforms.transform import Transform from scipy.ndimage import affine_transform, gaussian_filter, map_coordinates # REVIEW Check if there is a benefit for using scipy instead of the standard torchvision @@ -389,6 +392,7 @@ def __init__(self, image_format: str = "RGB") -> None: """ super().__init__() + # Previously used to get the grayscale value self.rgb_weights = np.asarray([0.299, 0.587, 0.114]).astype(np.float32) self.image_format = image_format @@ -566,6 +570,51 @@ def inverse(self) -> T.Transform: raise NotImplementedError +class OrientationTransform(T.Transform): + def __init__(self, times_90_degrees: int, height: int, width: int) -> None: + super().__init__() + self.times_90_degrees = times_90_degrees % 4 + self.height = height + self.width = width + + def apply_image(self, img: np.ndarray) -> np.ndarray: + if self.times_90_degrees == 0: + return img + elif self.times_90_degrees == 1: + return cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) + elif self.times_90_degrees == 2: + return cv2.rotate(img, cv2.ROTATE_180) + elif self.times_90_degrees == 3: + return cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE) + else: + raise ValueError("Times 90 degrees should be between 0 and 3") + + def apply_coords(self, coords: np.ndarray) -> np.ndarray: + if self.times_90_degrees == 0: + return coords + elif self.times_90_degrees == 1: + new_coords = coords.copy() + new_coords[:, 0], new_coords[:, 1] = self.height - coords[:, 1], coords[:, 0] + return new_coords + elif self.times_90_degrees == 2: + new_coords = coords.copy() + new_coords[:, 0], new_coords[:, 1] = self.width - coords[:, 1], self.height - coords[:, 0] + return new_coords + elif self.times_90_degrees == 3: + new_coords = coords.copy() + new_coords[:, 0], new_coords[:, 1] = coords[:, 1], self.width - coords[:, 0] + return new_coords + else: + raise ValueError("Times 90 degrees should be between 0 and 3") + + def inverse(self) -> Transform: + if self.times_90_degrees % 2 == 0: + height, width = self.height, self.width + else: + width, height = self.width, self.height + return OrientationTransform(4 - self.times_90_degrees, height, width) + + def get_arguments() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Testing the image augmentation and transformations") io_args = parser.add_argument_group("IO") @@ -590,7 +639,7 @@ def test(args) -> None: image = cv2.imread(str(input_path))[..., ::-1] print(image.dtype) - affine = AffineTransform(np.eye(3)) + affine = RandomRotation().get_transform(image) output_image = affine.apply_image(image) im = Image.fromarray(image)