daita-technologies · pcaversaccio · May 23, 2022 · Apr 21, 2022 · Apr 24, 2022 · Apr 25, 2022
diff --git a/augmentation/augmentations_list.py b/augmentation/augmentations_list.py
diff --git a/augmentation/augmentor.py b/augmentation/augmentor.py
@@ -6,17 +6,19 @@
 import json
 import uuid
 from pprint import pformat
-import traceback
 import random
 import os
-from typing import List, Optional, Tuple
+from copy import deepcopy
+from typing import Any, Dict, List, Optional, Tuple
 
 import augmentation.augmentations_list  # Import to register all augmentations
 from augmentation.registry import AUGMENTATIONS, CodeToAugment
 from utils import image_to_tensor, read_image, save_image, tensor_to_image
 
 
 class Augmentor:
+    SUPPORTED_EXTENSIONS: Tuple = (".png", ".jpg", ".jpeg")
+
     def __init__(self, use_gpu: bool = False):
         """
         Apply random augmentations on batch of images.
@@ -34,8 +36,8 @@ def process(
         input_image_paths: List[str],
         augment_codes: List[str],
         num_augments_per_image: int,
+        parameters: Dict[str, Dict[str, Any]],
         output_dir: str,
-        **kwargs,
     ) -> Tuple[List[str], List[str]]:
         """
         Apply augmentation on a list of images.
@@ -102,38 +104,59 @@ def process(
         ]
         ```
         """
+        print("*" * 100)
+
+        pid = os.getpid()
+        print(
+            f"[AUGMENTATION][pid {pid}] Found {len(input_image_paths)} images: {input_image_paths}"
+        )
+
+        # Skip running for un-supported extensions
+        for image_path in deepcopy(input_image_paths):
+            _, extension = os.path.splitext(image_path)
+            if extension.lower() not in Augmentor.SUPPORTED_EXTENSIONS:
+                print(
+                    f"[AUGMENTATION][pid {pid}] [WARNING] Only support these extensions: {Augmentor.SUPPORTED_EXTENSIONS}. "
+                    f"But got {extension=} in image {image_path}."
+                    "Skip this image."
+                )
+                input_image_paths.remove(image_path)
+
+        start_augmenting = time.time()
         if len(augment_codes) > 0:
             self.__check_valid_augment_codes(augment_codes)
         else:
             augment_codes: List[str] = list(CodeToAugment.keys())
+        print(
+            f"[AUGMENTATION][pid {pid}] "
+            f"{ {augment_code: CodeToAugment[augment_code] for augment_code in augment_codes} }"
+        )
 
         augment_code: str = random.choice(augment_codes)
         augment_name: str = CodeToAugment[augment_code]
-        print(f"{augment_code}: {augment_name}")
-
-        print(f"Found {len(input_image_paths)} images.")
-        output_image_paths: List[str] = []
-        output_json_paths: List[str] = []
-        try:
-            output_image_paths, output_json_paths = self.__process_batch(
-                input_image_paths,
-                augment_name,
-                num_augments_per_image,
-                output_dir,
-                **kwargs,
-            )
-        except Exception:
-            print(f"Error: {traceback.format_exc()}")
 
+        output_image_paths, output_json_paths = self.__process_batch(
+            input_image_paths,
+            augment_name,
+            num_augments_per_image,
+            parameters.get(augment_code, {}),
+            output_dir,
+        )
+
+        end_augmenting = time.time()
+        print(
+            f"[AUGMENTATION][pid {pid}] Done augmenting {len(input_image_paths)} images: "
+            f"{round(end_augmenting - start_augmenting, 4)} seconds"
+        )
         return output_image_paths, output_json_paths
 
     def __process_batch(
         self,
         image_paths: List[str],
         augment_name: str,
         num_augments_per_image: int,
+        parameters: Dict[str, Any],
         output_dir: str,
-        **kwargs,
     ) -> Tuple[List[str], List[str]]:
         """
         Generate list of augmented images from an image path.
@@ -180,25 +203,30 @@ def __process_batch(
         ]
         ```
         """
+        pid = os.getpid()
+
         original_sizes: List[
             Tuple[int, int]
         ] = []  # original height and widths of images
         images_tensor: List[torch.Tensor] = []
         for image_path in image_paths:
-            start = time.time()
+            print(f"[AUGMENTATION][pid {pid}] {image_path} | ", end="")
+            start_read = time.time()
             image: np.ndarray = read_image(image_path)
-            end = time.time()
-            print(f"Read image {image_path}: {round(end - start, 2)} seconds")
+            end_read = time.time()
+            print(f"Read image: {round(end_read - start_read, 2)} seconds | ", end="")
 
             # Resize tensor images for faster processeing
             image_tensor: torch.Tensor = image_to_tensor(image).to(self.device)
             original_sizes.append(image_tensor.shape[-2:])
-            start = time.time()
+
+            start_resize = time.time()
             image_tensor: torch.Tensor = K.geometry.resize(
                 image_tensor, size=(1024, 1024)
             )
-            end = time.time()
-            print(f"Resize image: {round(end - start, 2)} seconds")
+            end_resize = time.time()
+            print(f"Resize image: {round(end_resize - start_resize, 2)} seconds")
+
             images_tensor.append(image_tensor)
 
         # Stack multiple same images to form a batch
@@ -208,22 +236,31 @@ def __process_batch(
         output_image_paths: List[str] = []
         output_json_paths: List[str] = []
 
-        # Augment batch
+        print(
+            f"[AUGMENTATION][pid {pid}] Augmenting batch of {len(images_tensor)} images: {parameters=}"
+        )
         for _ in range(num_augments_per_image):
+            # Augment a batch of images
             start = time.time()
-            images_tensor_out = AUGMENTATIONS[augment_name](images_tensor)
+            images_tensor_out = AUGMENTATIONS[augment_name](
+                images_tensor, parameters=parameters
+            )
             end = time.time()
             print(
-                f"Generated {len(images_tensor)} images: {round(end - start, 2)} seconds"
+                f"[AUGMENTATION][pid {pid}] "
+                f"{augment_name=}: {round(end - start, 2)} seconds"
             )
 
             # Save generated images
             for image_path, image_tensor, original_size in zip(
                 image_paths, images_tensor_out, original_sizes
             ):
-                # Resize back to original size
-                height, width = original_size
-                image_tensor = K.geometry.resize(image_tensor, size=(height, width))
+                print(f"[AUGMENTATION][pid {pid}] {image_path} | ", end="")
+
+                # Resize images back to original size, EXCEPT for super_resolution
+                if augment_name != "super_resolution":
+                    height, width = original_size
+                    image_tensor = K.geometry.resize(image_tensor, size=(height, width))
                 image: np.ndarray = tensor_to_image(image_tensor)
 
                 name_without_ext, ext = os.path.splitext(os.path.basename(image_path))
@@ -245,15 +282,23 @@ def __process_batch(
 
             assert len(output_image_paths) == len(output_json_paths)
 
-        print("*" * 100)
         return output_image_paths, output_json_paths
 
     def __check_valid_augment_codes(self, augment_codes: List[str]) -> Optional[bool]:
+        pid = os.getpid()
+
+        # Map from an augment code to its augment name
+        supported_augment_codes: Dict[str, str] = {
+            augment_code: augment_name
+            for augment_code, augment_name in CodeToAugment.items()
+            if augment_name in AUGMENTATIONS.keys()
+        }
+
         for augment_code in augment_codes:
-            augment_name: str = CodeToAugment[augment_code]
-            if augment_name not in AUGMENTATIONS.keys():
+            if augment_code not in supported_augment_codes.keys():
                 message: str = (
-                    f"Only support these of augmentations: {pformat(CodeToAugment)}. "
+                    f"[AUGMENTATION][pid {pid}] "
+                    f"Only support these of augmentations: {pformat(supported_augment_codes)}. "
                     f"Got {augment_code=}!"
                 )
                 print(message)
@@ -274,10 +319,15 @@ def __init_device(self, use_gpu: bool) -> torch.device:
         -------
         "cpu" or "cuda" device
         """
+        pid = os.getpid()
         if use_gpu and torch.cuda.is_available():
             device: torch.device = torch.device("cuda:0")
-            print(f"{use_gpu=} and cuda is available. Initialized {device}")
+            print(
+                f"[AUGMENTATION][pid {pid}] {use_gpu=} and cuda is available. Initialized {device}"
+            )
         else:
             device = torch.device("cpu")
-            print(f"{use_gpu=} and cuda not found. Initialized {device}")
+            print(
+                f"[AUGMENTATION][pid {pid}] {use_gpu=} and cuda not found. Initialized {device}"
+            )
         return device
diff --git a/augmentation/deploy.py b/augmentation/deploy.py
@@ -1,34 +1,21 @@
 import ray
 from ray import serve
 from starlette.requests import Request
-from starlette.responses import Response
+from starlette.responses import JSONResponse
 
-import logging
-import sys
 import traceback
-import multiprocessing as mp
-from typing import List, Dict
+from typing import Any, List, Dict
 
 from augmentation.augmentor import Augmentor
-from utils import get_current_time
-
-
-CURRENT_TIME: str = get_current_time()
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s %(name)s %(levelname)s:  %(message)s",
-    datefmt="%y-%b-%d %H:%M:%S",
-    handlers=[
-        logging.StreamHandler(sys.stdout),
-        logging.FileHandler(
-            f"logs/augmentaions_{CURRENT_TIME}.txt", mode="w", encoding="utf-8"
-        ),
-    ],
-)
-logger = logging.getLogger(__file__)
 
 
-class AugmentorDeployment:
+@serve.deployment(
+    route_prefix="/augmentation",
+    num_replicas=1,
+    max_concurrent_queries=100,
+    ray_actor_options={"num_cpus": 1, "num_gpus": 0},
+)
+class AugmentationDeployment:
     def __init__(self, use_gpu: bool = False):
         """
         Deploy and apply random augmentations on batch of images with Ray Serve.
@@ -43,7 +30,7 @@ def __init__(self, use_gpu: bool = False):
 
     async def __call__(self, request: Request) -> List[Dict[str, object]]:
         """
-        Wrapper of `Augmentor.process` when called with HTTP request.
+        Wrapper of `Augmentor.process` and `Preprocessor.process` when called with HTTP request.
 
         Parameters:
         ----------
@@ -115,15 +102,21 @@ async def __call__(self, request: Request) -> List[Dict[str, object]]:
         try:
             input_image_paths: str = data["images_paths"]
             output_dir: str = data["output_folder"]
-            augment_code: str = data["augment_code"]
-            num_augments_per_image: int = data["num_augments_per_image"]
+            augment_codes: List[str] = data["codes"]
+            num_augments_per_image: int = data.get("num_augments_per_image", 1)
+            parameters: Dict[str, Dict[str, Any]] = data.get("parameters", {})
 
             output_image_paths, output_json_paths = self.augmentor.process(
-                input_image_paths, augment_code, num_augments_per_image, output_dir
+                input_image_paths,
+                augment_codes,
+                num_augments_per_image,
+                parameters,
+                output_dir,
             )
             return {"images_paths": output_image_paths, "json_paths": output_json_paths}
+
         except Exception:
-            return Response(status_code=500, content=traceback.format_exc())
+            return JSONResponse(status_code=500, content=traceback.format_exc())
 
 
 if __name__ == "__main__":
@@ -132,13 +125,4 @@ async def __call__(self, request: Request) -> List[Dict[str, object]]:
     serve.start(detached=True, http_options={"host": "0.0.0.0", "port": 8000})
 
     # Deploy
-    num_cpus: int = mp.cpu_count()
-    serve.deployment(AugmentorDeployment).options(
-        route_prefix="/augmentation",
-        num_replicas=2,
-        max_concurrent_queries=32,
-        ray_actor_options={"num_cpus": num_cpus, "num_gpus": 0},
-        init_kwargs={
-            "use_gpu": False,
-        },
-    ).deploy()
+    AugmentationDeployment.deploy(use_gpu=False)
diff --git a/augmentation/registry.py b/augmentation/registry.py
@@ -25,8 +25,12 @@
 
 
 def register_augmentation(name: str):
-    def wrapper(augmentation_class):
-        AUGMENTATIONS[name] = augmentation_class
-        return augmentation_class
+    def decorator(augmentation_function):
+        AUGMENTATIONS[name] = augmentation_function
 
-    return wrapper
+        def wrapper(*args, **kwargs):
+            return augmentation_function(*args, **kwargs)
+
+        return wrapper
+
+    return decorator