Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WWB]: Added validation for Inpainting pipeline #1451

Merged
merged 8 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
import subprocess # nosec B404
import os
import shutil
Expand All @@ -9,6 +10,9 @@
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

MODEL_CACHE = tempfile.mkdtemp()
OV_IMAGE_MODELS = ["OpenVINO/stable-diffusion-v1-5-int8-ov"]


def run_wwb(args):
logger.info(" ".join(["TRANSFOREMRS_VERBOSITY=debug wwb"] + args))
Expand All @@ -17,6 +21,19 @@ def run_wwb(args):
return result


def setup_module():
for model_id in OV_IMAGE_MODELS:
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
subprocess.run(["huggingface-cli", "download",
model_id, "--local-dir",
MODEL_PATH], capture_output=True, text=True)


def teardown_module():
logger.info("Remove models")
shutil.rmtree(MODEL_CACHE)


@pytest.mark.parametrize(
("model_id", "model_type", "backend"),
[
Expand All @@ -25,6 +42,8 @@ def run_wwb(args):
("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "openvino"),
("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "text-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-torch", "image-inpainting", "hf"),
("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "image-inpainting", "hf"),
],
)
def test_image_model_types(model_id, model_type, backend):
Expand Down Expand Up @@ -68,21 +87,13 @@ def test_image_model_types(model_id, model_type, backend):

@pytest.mark.parametrize(
("model_id", "model_type"),
[
("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "image-to-image"),
("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "text-to-image"),
],
list(itertools.product(OV_IMAGE_MODELS,
["image-to-image", "text-to-image", "image-inpainting"])),
)
def test_image_model_genai(model_id, model_type):
with tempfile.TemporaryDirectory() as temp_dir:
GT_FILE = os.path.join(temp_dir, "gt.csv")
MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--"))

result = subprocess.run(["huggingface-cli", "download",
model_id, "--local-dir",
MODEL_PATH],
capture_output=True, text=True)
assert result.returncode == 0
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))

wwb_args = [
"--base-model",
Expand Down Expand Up @@ -169,7 +180,6 @@ def test_image_model_genai(model_id, model_type):

shutil.rmtree("reference", ignore_errors=True)
shutil.rmtree("target", ignore_errors=True)
shutil.rmtree(MODEL_PATH, ignore_errors=True)
shutil.rmtree(output_dir, ignore_errors=True)


Expand Down
4 changes: 3 additions & 1 deletion tools/who_what_benchmark/whowhatbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from .text_evaluator import TextEvaluator as Evaluator
from .text2image_evaluator import Text2ImageEvaluator
from .visualtext_evaluator import VisualTextEvaluator
from .image2image import Image2ImageEvaluator
from .im2im_evaluator import Image2ImageEvaluator
from .inpaint_evaluator import InpaintingEvaluator


__all__ = [
Expand All @@ -13,5 +14,6 @@
"Text2ImageEvaluator",
"VisualTextEvaluator",
"Image2ImageEvaluator",
"InpaintingEvaluator",
"EVALUATOR_REGISTRY",
]
133 changes: 133 additions & 0 deletions tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import os
from typing import Any, Union

import datasets
import pandas as pd
from tqdm import tqdm
from transformers import set_seed
import torch
import openvino_genai

from .registry import register_evaluator
from .text2image_evaluator import Text2ImageEvaluator

from .whowhat_metrics import ImageSimilarity


def preprocess_fn(example):
return {
"prompts": example["inpaint_caption"],
"images": example["coco_image"],
"masks": example["mask"],
}


def prepare_default_data(num_samples=None):
DATASET_NAME = "phiyodr/InpaintCOCO"
NUM_SAMPLES = 10 if num_samples is None else num_samples
set_seed(42)
default_dataset = datasets.load_dataset(
DATASET_NAME, split="test", streaming=True
).filter(lambda example: example["inpaint_caption"] != "").take(NUM_SAMPLES)
return default_dataset.map(
lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names
)


@register_evaluator("image-inpainting")
class InpaintingEvaluator(Text2ImageEvaluator):
def __init__(
self,
base_model: Any = None,
gt_data: str = None,
test_data: Union[str, list] = None,
metrics="similarity",
similarity_model_id: str = "openai/clip-vit-large-patch14",
num_inference_steps=4,
crop_prompts=True,
num_samples=None,
gen_image_fn=None,
seed=42,
is_genai=False,
) -> None:
assert (
base_model is not None or gt_data is not None
), "Text generation pipeline for evaluation or ground trush data must be defined"

self.test_data = test_data
self.metrics = metrics
self.crop_prompt = crop_prompts
self.num_samples = num_samples
self.num_inference_steps = num_inference_steps
self.seed = seed
self.similarity = None
self.similarity = ImageSimilarity(similarity_model_id)
self.last_cmp = None
self.gt_dir = os.path.dirname(gt_data)
self.generation_fn = gen_image_fn
self.is_genai = is_genai
self.resolution = None

if base_model:
self.gt_data = self._generate_data(
base_model, gen_image_fn, os.path.join(self.gt_dir, "reference")
)
else:
self.gt_data = pd.read_csv(gt_data, keep_default_na=False)

def _generate_data(self, model, gen_image_fn=None, image_dir="reference"):
def default_gen_image_fn(model, prompt, image, mask, num_inference_steps, generator=None):
with torch.no_grad():
output = model(
prompt,
image=image,
mask_image=mask,
num_inference_steps=num_inference_steps,
output_type="pil",
generator=generator,
)
return output.images[0]

generation_fn = gen_image_fn or default_gen_image_fn

if self.test_data:
if isinstance(self.test_data, str):
data = pd.read_csv(self.test_data)
else:
if isinstance(self.test_data, dict):
assert "prompts" in self.test_data
assert "images" in self.test_data
assert "masks" in self.test_data
data = dict(self.test_data)
data = pd.DataFrame.from_dict(data)
else:
data = pd.DataFrame.from_dict(prepare_default_data(self.num_samples))

prompts = data["prompts"]
images = data["images"]
masks = data["masks"]
output_images = []
rng = torch.Generator(device="cpu")

if not os.path.exists(image_dir):
os.makedirs(image_dir)

for i, (prompt, image, mask) in tqdm(enumerate(zip(prompts, images, masks)), desc="Evaluate pipeline"):
set_seed(self.seed)
rng = rng.manual_seed(self.seed)
output = generation_fn(
model,
prompt,
image=image,
mask=mask,
num_inference_steps=self.num_inference_steps,
generator=openvino_genai.TorchGenerator(self.seed) if self.is_genai else rng
)
image_path = os.path.join(image_dir, f"{i}.png")
output.save(image_path)
output_images.append(image_path)

res_data = {"prompts": list(prompts), "images": output_images}
df = pd.DataFrame(res_data)

return df
57 changes: 54 additions & 3 deletions tools/who_what_benchmark/whowhatbench/model_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json

from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq
from diffusers import DiffusionPipeline, AutoPipelineForImage2Image
from diffusers import DiffusionPipeline, AutoPipelineForImage2Image, AutoPipelineForInpainting


logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -107,7 +107,7 @@ def load_text2image_model(

try:
model = TEXT2IMAGEPipeline.from_pretrained(
model_id, trust_remote_code=True, device=device, ov_config=ov_config
model_id, trust_remote_code=True, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError:
config = AutoConfig.from_pretrained(
Expand All @@ -119,6 +119,7 @@ def load_text2image_model(
use_cache=True,
device=device,
ov_config=ov_config,
safety_checker=None,
)

return model
Expand Down Expand Up @@ -211,7 +212,7 @@ def load_imagetext2image_model(
from optimum.intel.openvino import OVPipelineForImage2Image
try:
model = OVPipelineForImage2Image.from_pretrained(
model_id, trust_remote_code=True, device=device, ov_config=ov_config
model_id, trust_remote_code=True, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError:
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
Expand All @@ -222,6 +223,54 @@ def load_imagetext2image_model(
use_cache=True,
device=device,
ov_config=ov_config,
safety_checker=None,
)
return model


def load_inpainting_genai_pipeline(model_dir, device="CPU", ov_config=None):
try:
import openvino_genai
except ImportError as e:
logger.error("Failed to import openvino_genai package. Please install it. Details:\n", e)
exit(-1)

return GenAIModelWrapper(
openvino_genai.InpaintingPipeline(model_dir, device, **ov_config),
model_dir,
"image-inpainting"
)


def load_inpainting_model(
model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False
):
if use_hf:
logger.info("Using HF Transformers API")
model = AutoPipelineForInpainting.from_pretrained(
model_id, trust_remote_code=True
)
elif use_genai:
logger.info("Using OpenVINO GenAI API")
model = load_inpainting_genai_pipeline(model_id, device, ov_config)
else:
logger.info("Using Optimum API")
from optimum.intel.openvino import OVPipelineForInpainting
try:
model = OVPipelineForInpainting.from_pretrained(
model_id, trust_remote_code=True, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError as e:
logger.error("Failed to load inpaiting pipeline. Details:\n", e)
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
model = OVPipelineForInpainting.from_pretrained(
model_id,
config=config,
trust_remote_code=True,
use_cache=True,
device=device,
ov_config=ov_config,
safety_checker=None,
)
return model

Expand All @@ -248,5 +297,7 @@ def load_model(
return load_visual_text_model(model_id, device, ov_options, use_hf, use_genai)
elif model_type == "image-to-image":
return load_imagetext2image_model(model_id, device, ov_options, use_hf, use_genai)
elif model_type == "image-inpainting":
return load_inpainting_model(model_id, device, ov_options, use_hf, use_genai)
else:
raise ValueError(f"Unsupported model type: {model_type}")
27 changes: 26 additions & 1 deletion tools/who_what_benchmark/whowhatbench/wwb.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def parse_args():
parser.add_argument(
"--model-type",
type=str,
choices=["text", "text-to-image", "visual-text", "image-to-image"],
choices=["text", "text-to-image", "visual-text", "image-to-image", "image-inpainting"],
default="text",
help="Indicated the model type: 'text' - for causal text generation, 'text-to-image' - for image generation, "
"visual-text - for Visual Language Models, image-to-image - for image generation based on image and prompt",
Expand Down Expand Up @@ -282,6 +282,20 @@ def genai_gen_image2image(model, prompt, image, num_inference_steps, generator=N
return image


def genai_gen_inpainting(model, prompt, image, mask, num_inference_steps, generator=None):
image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8))
mask_data = ov.Tensor(np.array(mask.getdata()).reshape(1, mask.size[1], mask.size[0], 3).astype(np.uint8))
image_tensor = model.generate(
prompt,
image=image_data,
mask_image=mask_data,
num_inference_steps=num_inference_steps,
generator=generator,
)
image = Image.fromarray(image_tensor.data[0])
return image


def genai_gen_visual_text(model, prompt, image, processor, tokenizer, max_new_tokens, crop_question):
image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8))
config = model.get_generation_config()
Expand Down Expand Up @@ -355,6 +369,17 @@ def create_evaluator(base_model, args):
is_genai=args.genai,
seed=args.seed,
)
elif task == "image-inpainting":
return EvaluatorCLS(
base_model=base_model,
gt_data=args.gt_data,
test_data=prompts,
num_samples=args.num_samples,
num_inference_steps=args.num_inference_steps,
gen_image_fn=genai_gen_inpainting if args.genai else None,
is_genai=args.genai,
seed=args.seed,
)
else:
raise ValueError(f"Unsupported task: {task}")

Expand Down
Loading