Skip to content

Commit

Permalink
[WWB]: Added validation for Inpainting pipeline (#1451)
Browse files Browse the repository at this point in the history
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
  • Loading branch information
AlexKoff88 and ilya-lavrenov authored Dec 30, 2024
1 parent 0c5f03b commit 3d6b1ad
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 17 deletions.
34 changes: 22 additions & 12 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
import subprocess # nosec B404
import os
import shutil
Expand All @@ -9,6 +10,9 @@
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

MODEL_CACHE = tempfile.mkdtemp()
OV_IMAGE_MODELS = ["OpenVINO/stable-diffusion-v1-5-int8-ov"]


def run_wwb(args):
logger.info(" ".join(["TRANSFOREMRS_VERBOSITY=debug wwb"] + args))
Expand All @@ -17,6 +21,19 @@ def run_wwb(args):
return result


def setup_module():
for model_id in OV_IMAGE_MODELS:
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
subprocess.run(["huggingface-cli", "download",
model_id, "--local-dir",
MODEL_PATH], capture_output=True, text=True)


def teardown_module():
logger.info("Remove models")
shutil.rmtree(MODEL_CACHE)


@pytest.mark.parametrize(
("model_id", "model_type", "backend"),
[
Expand All @@ -25,6 +42,8 @@ def run_wwb(args):
("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "openvino"),
("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "text-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-torch", "image-inpainting", "hf"),
("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "image-inpainting", "hf"),
],
)
def test_image_model_types(model_id, model_type, backend):
Expand Down Expand Up @@ -68,21 +87,13 @@ def test_image_model_types(model_id, model_type, backend):

@pytest.mark.parametrize(
("model_id", "model_type"),
[
("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "image-to-image"),
("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "text-to-image"),
],
list(itertools.product(OV_IMAGE_MODELS,
["image-to-image", "text-to-image", "image-inpainting"])),
)
def test_image_model_genai(model_id, model_type):
with tempfile.TemporaryDirectory() as temp_dir:
GT_FILE = os.path.join(temp_dir, "gt.csv")
MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--"))

result = subprocess.run(["huggingface-cli", "download",
model_id, "--local-dir",
MODEL_PATH],
capture_output=True, text=True)
assert result.returncode == 0
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))

wwb_args = [
"--base-model",
Expand Down Expand Up @@ -169,7 +180,6 @@ def test_image_model_genai(model_id, model_type):

shutil.rmtree("reference", ignore_errors=True)
shutil.rmtree("target", ignore_errors=True)
shutil.rmtree(MODEL_PATH, ignore_errors=True)
shutil.rmtree(output_dir, ignore_errors=True)


Expand Down
4 changes: 3 additions & 1 deletion tools/who_what_benchmark/whowhatbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from .text_evaluator import TextEvaluator as Evaluator
from .text2image_evaluator import Text2ImageEvaluator
from .visualtext_evaluator import VisualTextEvaluator
from .image2image import Image2ImageEvaluator
from .im2im_evaluator import Image2ImageEvaluator
from .inpaint_evaluator import InpaintingEvaluator


__all__ = [
Expand All @@ -13,5 +14,6 @@
"Text2ImageEvaluator",
"VisualTextEvaluator",
"Image2ImageEvaluator",
"InpaintingEvaluator",
"EVALUATOR_REGISTRY",
]
133 changes: 133 additions & 0 deletions tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import os
from typing import Any, Union

import datasets
import pandas as pd
from tqdm import tqdm
from transformers import set_seed
import torch
import openvino_genai

from .registry import register_evaluator
from .text2image_evaluator import Text2ImageEvaluator

from .whowhat_metrics import ImageSimilarity


def preprocess_fn(example):
return {
"prompts": example["inpaint_caption"],
"images": example["coco_image"],
"masks": example["mask"],
}


def prepare_default_data(num_samples=None):
DATASET_NAME = "phiyodr/InpaintCOCO"
NUM_SAMPLES = 10 if num_samples is None else num_samples
set_seed(42)
default_dataset = datasets.load_dataset(
DATASET_NAME, split="test", streaming=True
).filter(lambda example: example["inpaint_caption"] != "").take(NUM_SAMPLES)
return default_dataset.map(
lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names
)


@register_evaluator("image-inpainting")
class InpaintingEvaluator(Text2ImageEvaluator):
def __init__(
self,
base_model: Any = None,
gt_data: str = None,
test_data: Union[str, list] = None,
metrics="similarity",
similarity_model_id: str = "openai/clip-vit-large-patch14",
num_inference_steps=4,
crop_prompts=True,
num_samples=None,
gen_image_fn=None,
seed=42,
is_genai=False,
) -> None:
assert (
base_model is not None or gt_data is not None
), "Text generation pipeline for evaluation or ground trush data must be defined"

self.test_data = test_data
self.metrics = metrics
self.crop_prompt = crop_prompts
self.num_samples = num_samples
self.num_inference_steps = num_inference_steps
self.seed = seed
self.similarity = None
self.similarity = ImageSimilarity(similarity_model_id)
self.last_cmp = None
self.gt_dir = os.path.dirname(gt_data)
self.generation_fn = gen_image_fn
self.is_genai = is_genai
self.resolution = None

if base_model:
self.gt_data = self._generate_data(
base_model, gen_image_fn, os.path.join(self.gt_dir, "reference")
)
else:
self.gt_data = pd.read_csv(gt_data, keep_default_na=False)

def _generate_data(self, model, gen_image_fn=None, image_dir="reference"):
def default_gen_image_fn(model, prompt, image, mask, num_inference_steps, generator=None):
with torch.no_grad():
output = model(
prompt,
image=image,
mask_image=mask,
num_inference_steps=num_inference_steps,
output_type="pil",
generator=generator,
)
return output.images[0]

generation_fn = gen_image_fn or default_gen_image_fn

if self.test_data:
if isinstance(self.test_data, str):
data = pd.read_csv(self.test_data)
else:
if isinstance(self.test_data, dict):
assert "prompts" in self.test_data
assert "images" in self.test_data
assert "masks" in self.test_data
data = dict(self.test_data)
data = pd.DataFrame.from_dict(data)
else:
data = pd.DataFrame.from_dict(prepare_default_data(self.num_samples))

prompts = data["prompts"]
images = data["images"]
masks = data["masks"]
output_images = []
rng = torch.Generator(device="cpu")

if not os.path.exists(image_dir):
os.makedirs(image_dir)

for i, (prompt, image, mask) in tqdm(enumerate(zip(prompts, images, masks)), desc="Evaluate pipeline"):
set_seed(self.seed)
rng = rng.manual_seed(self.seed)
output = generation_fn(
model,
prompt,
image=image,
mask=mask,
num_inference_steps=self.num_inference_steps,
generator=openvino_genai.TorchGenerator(self.seed) if self.is_genai else rng
)
image_path = os.path.join(image_dir, f"{i}.png")
output.save(image_path)
output_images.append(image_path)

res_data = {"prompts": list(prompts), "images": output_images}
df = pd.DataFrame(res_data)

return df
57 changes: 54 additions & 3 deletions tools/who_what_benchmark/whowhatbench/model_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json

from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq
from diffusers import DiffusionPipeline, AutoPipelineForImage2Image
from diffusers import DiffusionPipeline, AutoPipelineForImage2Image, AutoPipelineForInpainting


logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -107,7 +107,7 @@ def load_text2image_model(

try:
model = TEXT2IMAGEPipeline.from_pretrained(
model_id, trust_remote_code=True, device=device, ov_config=ov_config
model_id, trust_remote_code=True, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError:
config = AutoConfig.from_pretrained(
Expand All @@ -119,6 +119,7 @@ def load_text2image_model(
use_cache=True,
device=device,
ov_config=ov_config,
safety_checker=None,
)

return model
Expand Down Expand Up @@ -211,7 +212,7 @@ def load_imagetext2image_model(
from optimum.intel.openvino import OVPipelineForImage2Image
try:
model = OVPipelineForImage2Image.from_pretrained(
model_id, trust_remote_code=True, device=device, ov_config=ov_config
model_id, trust_remote_code=True, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError:
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
Expand All @@ -222,6 +223,54 @@ def load_imagetext2image_model(
use_cache=True,
device=device,
ov_config=ov_config,
safety_checker=None,
)
return model


def load_inpainting_genai_pipeline(model_dir, device="CPU", ov_config=None):
try:
import openvino_genai
except ImportError as e:
logger.error("Failed to import openvino_genai package. Please install it. Details:\n", e)
exit(-1)

return GenAIModelWrapper(
openvino_genai.InpaintingPipeline(model_dir, device, **ov_config),
model_dir,
"image-inpainting"
)


def load_inpainting_model(
model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False
):
if use_hf:
logger.info("Using HF Transformers API")
model = AutoPipelineForInpainting.from_pretrained(
model_id, trust_remote_code=True
)
elif use_genai:
logger.info("Using OpenVINO GenAI API")
model = load_inpainting_genai_pipeline(model_id, device, ov_config)
else:
logger.info("Using Optimum API")
from optimum.intel.openvino import OVPipelineForInpainting
try:
model = OVPipelineForInpainting.from_pretrained(
model_id, trust_remote_code=True, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError as e:
logger.error("Failed to load inpaiting pipeline. Details:\n", e)
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
model = OVPipelineForInpainting.from_pretrained(
model_id,
config=config,
trust_remote_code=True,
use_cache=True,
device=device,
ov_config=ov_config,
safety_checker=None,
)
return model

Expand All @@ -248,5 +297,7 @@ def load_model(
return load_visual_text_model(model_id, device, ov_options, use_hf, use_genai)
elif model_type == "image-to-image":
return load_imagetext2image_model(model_id, device, ov_options, use_hf, use_genai)
elif model_type == "image-inpainting":
return load_inpainting_model(model_id, device, ov_options, use_hf, use_genai)
else:
raise ValueError(f"Unsupported model type: {model_type}")
27 changes: 26 additions & 1 deletion tools/who_what_benchmark/whowhatbench/wwb.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def parse_args():
parser.add_argument(
"--model-type",
type=str,
choices=["text", "text-to-image", "visual-text", "image-to-image"],
choices=["text", "text-to-image", "visual-text", "image-to-image", "image-inpainting"],
default="text",
help="Indicated the model type: 'text' - for causal text generation, 'text-to-image' - for image generation, "
"visual-text - for Visual Language Models, image-to-image - for image generation based on image and prompt",
Expand Down Expand Up @@ -282,6 +282,20 @@ def genai_gen_image2image(model, prompt, image, num_inference_steps, generator=N
return image


def genai_gen_inpainting(model, prompt, image, mask, num_inference_steps, generator=None):
image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8))
mask_data = ov.Tensor(np.array(mask.getdata()).reshape(1, mask.size[1], mask.size[0], 3).astype(np.uint8))
image_tensor = model.generate(
prompt,
image=image_data,
mask_image=mask_data,
num_inference_steps=num_inference_steps,
generator=generator,
)
image = Image.fromarray(image_tensor.data[0])
return image


def genai_gen_visual_text(model, prompt, image, processor, tokenizer, max_new_tokens, crop_question):
image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8))
config = model.get_generation_config()
Expand Down Expand Up @@ -355,6 +369,17 @@ def create_evaluator(base_model, args):
is_genai=args.genai,
seed=args.seed,
)
elif task == "image-inpainting":
return EvaluatorCLS(
base_model=base_model,
gt_data=args.gt_data,
test_data=prompts,
num_samples=args.num_samples,
num_inference_steps=args.num_inference_steps,
gen_image_fn=genai_gen_inpainting if args.genai else None,
is_genai=args.genai,
seed=args.seed,
)
else:
raise ValueError(f"Unsupported task: {task}")

Expand Down

0 comments on commit 3d6b1ad

Please sign in to comment.