Skip to content
This repository has been archived by the owner on May 30, 2023. It is now read-only.

✨ Feat: Preprocessing and augmentations with parameters #25

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
28afd36
Update
AI-Daita Apr 21, 2022
f185d00
Accept parameters when run augmentations
AI-Daita Apr 24, 2022
385b67b
Fix a bug when parameters are empty
AI-Daita Apr 25, 2022
52cfcb0
Increase number of replicas when deploy
AI-Daita Apr 25, 2022
96f6bb4
Accept parameters when deploy augmentations
AI-Daita Apr 25, 2022
fddb5cb
Remove healtcheck function when deploy
AI-Daita Apr 25, 2022
e2b4cfc
Fix wrong dimension in NormalizeBrightness
AI-Daita Apr 25, 2022
75412bb
Fix wrong dimension in NormalizeSaturation
AI-Daita Apr 25, 2022
78b66b9
Fix wrong dimension in NormalizeSaturation
AI-Daita Apr 25, 2022
e382edd
Fix wrong dimension
AI-Daita Apr 25, 2022
7b1f92b
Add functions to find reference images based on preprocessing method
AI-Daita Apr 25, 2022
9939c33
Update preprocessing with new reference images
AI-Daita Apr 25, 2022
0260e15
Reduce number of replicas when deploy
AI-Daita Apr 28, 2022
89c412d
Run preprocessing with single process
AI-Daita May 1, 2022
d3ff503
Add debug statement in preprocessing
AI-Daita May 1, 2022
121cb41
Add debug statement in augmentation
AI-Daita May 1, 2022
cdcf202
Deploy 1 preprocessing model per core instead of using multi-processing
AI-Daita May 5, 2022
5e265c7
Split augmentation deployment into separate endpoint
AI-Daita May 5, 2022
78d9f4f
Change replicas of preprocessing and augmentation
AI-Daita May 5, 2022
f018709
Split augmentation deployment into separate endpoint
AI-Daita May 6, 2022
95095a4
Add debug statement in augmentation
AI-Daita May 6, 2022
e9f8a71
Add debug statement in augmentation and preprocessing
AI-Daita May 6, 2022
825ffa7
Add grayscale and high_resolution back to registry
AI-Daita May 12, 2022
3bbfd6a
Fix a bug when calling grayscale in preprocessing
AI-Daita May 12, 2022
721df92
Only keep extra dependencies in requirements-dev
AI-Daita May 14, 2022
369ca06
Add python-dotenv to read secret
AI-Daita May 14, 2022
bac0e67
Read aws secret from .env
AI-Daita May 14, 2022
0eae052
Read aws secret from .env
AI-Daita May 14, 2022
dceb916
Specify absolute .env file and region name
AI-Daita May 15, 2022
674a57b
Specify absolute .env file and region name
AI-Daita May 15, 2022
9200c37
Remove grayscale when run preprocessing in auto mode
AI-Daita May 16, 2022
efd20c0
Fix a bug in random_translate
AI-Daita May 19, 2022
826ebdb
Fix a bug when calling augmentation with parameters
AI-Daita May 19, 2022
5b449e8
Change default value
AI-Daita May 19, 2022
ff385ea
♻️ formatting
pcaversaccio May 20, 2022
cc8f337
Merge pull request #21 from daita-technologies/feat/run-preprocessing…
pcaversaccio May 20, 2022
43153a5
requirements-dev
pcaversaccio May 20, 2022
d7064d7
Resolve conflicts
AI-Daita May 22, 2022
6e55669
Add debug statements in augmentation
AI-Daita May 22, 2022
e9ba3ec
Skip running augmentations for un-supported files
AI-Daita May 22, 2022
7686e54
Skip running preprocessing for un-supported files
AI-Daita May 22, 2022
77e0f84
Add function to find reference image for high resolution
AI-Daita May 22, 2022
e586760
Return image with max height or width if there are no desired aspect …
AI-Daita May 22, 2022
43cecf1
🧹 fix formatting
pcaversaccio May 23, 2022
76ad7e9
removing Any
pcaversaccio May 23, 2022
a69d0a7
removing Counter
pcaversaccio May 23, 2022
cb0bce9
remove kornia from preprocessing_utils.py
pcaversaccio May 23, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
273 changes: 217 additions & 56 deletions augmentation/augmentations_list.py

Large diffs are not rendered by default.

124 changes: 87 additions & 37 deletions augmentation/augmentor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@
import json
import uuid
from pprint import pformat
import traceback
import random
import os
from typing import List, Optional, Tuple
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple

import augmentation.augmentations_list # Import to register all augmentations
pcaversaccio marked this conversation as resolved.
Show resolved Hide resolved
pcaversaccio marked this conversation as resolved.
Show resolved Hide resolved
from augmentation.registry import AUGMENTATIONS, CodeToAugment
from utils import image_to_tensor, read_image, save_image, tensor_to_image


class Augmentor:
SUPPORTED_EXTENSIONS: Tuple = (".png", ".jpg", ".jpeg")

def __init__(self, use_gpu: bool = False):
"""
Apply random augmentations on batch of images.
Expand All @@ -34,8 +36,8 @@ def process(
input_image_paths: List[str],
augment_codes: List[str],
num_augments_per_image: int,
parameters: Dict[str, Dict[str, Any]],
output_dir: str,
**kwargs,
) -> Tuple[List[str], List[str]]:
"""
Apply augmentation on a list of images.
Expand Down Expand Up @@ -102,38 +104,59 @@ def process(
]
```
"""
print("*" * 100)

pid = os.getpid()
print(
f"[AUGMENTATION][pid {pid}] Found {len(input_image_paths)} images: {input_image_paths}"
)

# Skip running for un-supported extensions
for image_path in deepcopy(input_image_paths):
_, extension = os.path.splitext(image_path)
if extension.lower() not in Augmentor.SUPPORTED_EXTENSIONS:
print(
f"[AUGMENTATION][pid {pid}] [WARNING] Only support these extensions: {Augmentor.SUPPORTED_EXTENSIONS}. "
f"But got {extension=} in image {image_path}."
"Skip this image."
)
input_image_paths.remove(image_path)

start_augmenting = time.time()
if len(augment_codes) > 0:
self.__check_valid_augment_codes(augment_codes)
else:
augment_codes: List[str] = list(CodeToAugment.keys())
print(
f"[AUGMENTATION][pid {pid}] "
f"{ {augment_code: CodeToAugment[augment_code] for augment_code in augment_codes} }"
)

augment_code: str = random.choice(augment_codes)
augment_name: str = CodeToAugment[augment_code]
print(f"{augment_code}: {augment_name}")

print(f"Found {len(input_image_paths)} images.")
output_image_paths: List[str] = []
output_json_paths: List[str] = []
try:
output_image_paths, output_json_paths = self.__process_batch(
input_image_paths,
augment_name,
num_augments_per_image,
output_dir,
**kwargs,
)
except Exception:
print(f"Error: {traceback.format_exc()}")

output_image_paths, output_json_paths = self.__process_batch(
input_image_paths,
augment_name,
num_augments_per_image,
parameters.get(augment_code, {}),
output_dir,
)

end_augmenting = time.time()
print(
f"[AUGMENTATION][pid {pid}] Done augmenting {len(input_image_paths)} images: "
f"{round(end_augmenting - start_augmenting, 4)} seconds"
)
return output_image_paths, output_json_paths

def __process_batch(
self,
image_paths: List[str],
augment_name: str,
num_augments_per_image: int,
parameters: Dict[str, Any],
output_dir: str,
**kwargs,
) -> Tuple[List[str], List[str]]:
"""
Generate list of augmented images from an image path.
Expand Down Expand Up @@ -180,25 +203,30 @@ def __process_batch(
]
```
"""
pid = os.getpid()

original_sizes: List[
Tuple[int, int]
] = [] # original height and widths of images
images_tensor: List[torch.Tensor] = []
for image_path in image_paths:
start = time.time()
print(f"[AUGMENTATION][pid {pid}] {image_path} | ", end="")
start_read = time.time()
image: np.ndarray = read_image(image_path)
end = time.time()
print(f"Read image {image_path}: {round(end - start, 2)} seconds")
end_read = time.time()
print(f"Read image: {round(end_read - start_read, 2)} seconds | ", end="")

# Resize tensor images for faster processeing
image_tensor: torch.Tensor = image_to_tensor(image).to(self.device)
original_sizes.append(image_tensor.shape[-2:])
start = time.time()

start_resize = time.time()
image_tensor: torch.Tensor = K.geometry.resize(
image_tensor, size=(1024, 1024)
)
end = time.time()
print(f"Resize image: {round(end - start, 2)} seconds")
end_resize = time.time()
print(f"Resize image: {round(end_resize - start_resize, 2)} seconds")

images_tensor.append(image_tensor)

# Stack multiple same images to form a batch
Expand All @@ -208,22 +236,31 @@ def __process_batch(
output_image_paths: List[str] = []
output_json_paths: List[str] = []

# Augment batch
print(
f"[AUGMENTATION][pid {pid}] Augmenting batch of {len(images_tensor)} images: {parameters=}"
)
for _ in range(num_augments_per_image):
# Augment a batch of images
start = time.time()
images_tensor_out = AUGMENTATIONS[augment_name](images_tensor)
images_tensor_out = AUGMENTATIONS[augment_name](
images_tensor, parameters=parameters
)
end = time.time()
print(
f"Generated {len(images_tensor)} images: {round(end - start, 2)} seconds"
f"[AUGMENTATION][pid {pid}] "
f"{augment_name=}: {round(end - start, 2)} seconds"
)

# Save generated images
for image_path, image_tensor, original_size in zip(
image_paths, images_tensor_out, original_sizes
):
# Resize back to original size
height, width = original_size
image_tensor = K.geometry.resize(image_tensor, size=(height, width))
print(f"[AUGMENTATION][pid {pid}] {image_path} | ", end="")

# Resize images back to original size, EXCEPT for super_resolution
if augment_name != "super_resolution":
height, width = original_size
image_tensor = K.geometry.resize(image_tensor, size=(height, width))
image: np.ndarray = tensor_to_image(image_tensor)

name_without_ext, ext = os.path.splitext(os.path.basename(image_path))
Expand All @@ -245,15 +282,23 @@ def __process_batch(

assert len(output_image_paths) == len(output_json_paths)

print("*" * 100)
return output_image_paths, output_json_paths

def __check_valid_augment_codes(self, augment_codes: List[str]) -> Optional[bool]:
pid = os.getpid()

# Map from an augment code to its augment name
supported_augment_codes: Dict[str, str] = {
augment_code: augment_name
for augment_code, augment_name in CodeToAugment.items()
if augment_name in AUGMENTATIONS.keys()
}

for augment_code in augment_codes:
augment_name: str = CodeToAugment[augment_code]
if augment_name not in AUGMENTATIONS.keys():
if augment_code not in supported_augment_codes.keys():
message: str = (
f"Only support these of augmentations: {pformat(CodeToAugment)}. "
f"[AUGMENTATION][pid {pid}] "
f"Only support these of augmentations: {pformat(supported_augment_codes)}. "
f"Got {augment_code=}!"
)
print(message)
Expand All @@ -274,10 +319,15 @@ def __init_device(self, use_gpu: bool) -> torch.device:
-------
"cpu" or "cuda" device
"""
pid = os.getpid()
if use_gpu and torch.cuda.is_available():
device: torch.device = torch.device("cuda:0")
print(f"{use_gpu=} and cuda is available. Initialized {device}")
print(
f"[AUGMENTATION][pid {pid}] {use_gpu=} and cuda is available. Initialized {device}"
)
else:
device = torch.device("cpu")
print(f"{use_gpu=} and cuda not found. Initialized {device}")
print(
f"[AUGMENTATION][pid {pid}] {use_gpu=} and cuda not found. Initialized {device}"
)
return device
58 changes: 21 additions & 37 deletions augmentation/deploy.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,21 @@
import ray
from ray import serve
from starlette.requests import Request
from starlette.responses import Response
from starlette.responses import JSONResponse

import logging
import sys
import traceback
import multiprocessing as mp
from typing import List, Dict
from typing import Any, List, Dict

from augmentation.augmentor import Augmentor
from utils import get_current_time


CURRENT_TIME: str = get_current_time()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(name)s %(levelname)s: %(message)s",
datefmt="%y-%b-%d %H:%M:%S",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(
f"logs/augmentaions_{CURRENT_TIME}.txt", mode="w", encoding="utf-8"
),
],
)
logger = logging.getLogger(__file__)


class AugmentorDeployment:
@serve.deployment(
route_prefix="/augmentation",
num_replicas=1,
max_concurrent_queries=100,
ray_actor_options={"num_cpus": 1, "num_gpus": 0},
)
class AugmentationDeployment:
def __init__(self, use_gpu: bool = False):
"""
Deploy and apply random augmentations on batch of images with Ray Serve.
Expand All @@ -43,7 +30,7 @@ def __init__(self, use_gpu: bool = False):

async def __call__(self, request: Request) -> List[Dict[str, object]]:
"""
Wrapper of `Augmentor.process` when called with HTTP request.
Wrapper of `Augmentor.process` and `Preprocessor.process` when called with HTTP request.

Parameters:
----------
Expand Down Expand Up @@ -115,15 +102,21 @@ async def __call__(self, request: Request) -> List[Dict[str, object]]:
try:
input_image_paths: str = data["images_paths"]
output_dir: str = data["output_folder"]
augment_code: str = data["augment_code"]
num_augments_per_image: int = data["num_augments_per_image"]
augment_codes: List[str] = data["codes"]
num_augments_per_image: int = data.get("num_augments_per_image", 1)
parameters: Dict[str, Dict[str, Any]] = data.get("parameters", {})

output_image_paths, output_json_paths = self.augmentor.process(
input_image_paths, augment_code, num_augments_per_image, output_dir
input_image_paths,
augment_codes,
num_augments_per_image,
parameters,
output_dir,
)
return {"images_paths": output_image_paths, "json_paths": output_json_paths}

except Exception:
return Response(status_code=500, content=traceback.format_exc())
return JSONResponse(status_code=500, content=traceback.format_exc())


if __name__ == "__main__":
Expand All @@ -132,13 +125,4 @@ async def __call__(self, request: Request) -> List[Dict[str, object]]:
serve.start(detached=True, http_options={"host": "0.0.0.0", "port": 8000})

# Deploy
num_cpus: int = mp.cpu_count()
serve.deployment(AugmentorDeployment).options(
route_prefix="/augmentation",
num_replicas=2,
max_concurrent_queries=32,
ray_actor_options={"num_cpus": num_cpus, "num_gpus": 0},
init_kwargs={
"use_gpu": False,
},
).deploy()
AugmentationDeployment.deploy(use_gpu=False)
12 changes: 8 additions & 4 deletions augmentation/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@


def register_augmentation(name: str):
def wrapper(augmentation_class):
AUGMENTATIONS[name] = augmentation_class
return augmentation_class
def decorator(augmentation_function):
AUGMENTATIONS[name] = augmentation_function

return wrapper
def wrapper(*args, **kwargs):
return augmentation_function(*args, **kwargs)

return wrapper

return decorator
Loading