Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support FluxFill inpainting pipeline #1095

Merged
merged 4 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
_torch_version,
_transformers_version,
compare_versions,
is_diffusers_version,
is_openvino_tokenizers_version,
is_tokenizers_version,
is_transformers_version,
Expand Down Expand Up @@ -988,24 +989,36 @@ def _get_submodels_and_export_configs(
def get_diffusion_models_for_export_ext(
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
):
try:
from diffusers import (
StableDiffusion3Img2ImgPipeline,
StableDiffusion3InpaintPipeline,
StableDiffusion3Pipeline,
)
if is_diffusers_version(">=", "0.29.0"):
from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline

is_sd3 = isinstance(
pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline)
)
except ImportError:
sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline]
if is_diffusers_version(">=", "0.30.0"):
from diffusers import StableDiffusion3InpaintPipeline

sd3_pipes.append(StableDiffusion3InpaintPipeline)
echarlaix marked this conversation as resolved.
Show resolved Hide resolved

is_sd3 = isinstance(pipeline, tuple(sd3_pipes))
else:
is_sd3 = False

try:
if is_diffusers_version(">=", "0.30.0"):
from diffusers import FluxPipeline

is_flux = isinstance(pipeline, FluxPipeline)
except ImportError:
flux_pipes = [FluxPipeline]

if is_diffusers_version(">=", "0.31.0"):
from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline

flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline])

if is_diffusers_version(">=", "0.32.0"):
from diffusers import FluxFillPipeline

flux_pipes.append(FluxFillPipeline)

is_flux = isinstance(pipeline, tuple(flux_pipes))
else:
is_flux = False

if not is_sd3 and not is_flux:
Expand Down
11 changes: 10 additions & 1 deletion optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@
)
from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig

from ...intel.utils.import_utils import _transformers_version, is_diffusers_version, is_transformers_version
from ...intel.utils.import_utils import (
_transformers_version,
is_diffusers_available,
is_diffusers_version,
is_transformers_version,
)
from .model_patcher import (
AquilaModelPatcher,
ArcticModelPatcher,
Expand Down Expand Up @@ -116,6 +121,10 @@ def init_model_configs():
"image-text-to-text"
] = TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"]

if is_diffusers_available() and "fill" not in TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS:
TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS["fill"] = "FluxFillPipeline"
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["fill"] = {"flux": "FluxFillPipeline"}
eaidova marked this conversation as resolved.
Show resolved Hide resolved

supported_model_types = [
"_SUPPORTED_MODEL_TYPE",
"_DIFFUSERS_SUPPORTED_MODEL_TYPE",
Expand Down
2 changes: 2 additions & 0 deletions optimum/intel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
"OVFluxPipeline",
"OVFluxImg2ImgPipeline",
"OVFluxInpaintPipeline",
"OVFluxFillPipeline",
"OVPipelineForImage2Image",
"OVPipelineForText2Image",
"OVPipelineForInpainting",
Expand All @@ -148,6 +149,7 @@
"OVFluxPipeline",
"OVFluxImg2ImgPipeline",
"OVFluxInpaintPipeline",
"OVFluxFillPipeline",
"OVPipelineForImage2Image",
"OVPipelineForText2Image",
"OVPipelineForInpainting",
Expand Down
1 change: 1 addition & 0 deletions optimum/intel/openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
if is_diffusers_available():
from .modeling_diffusion import (
OVDiffusionPipeline,
OVFluxFillPipeline,
OVFluxImg2ImgPipeline,
OVFluxInpaintPipeline,
OVFluxPipeline,
Expand Down
19 changes: 17 additions & 2 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@
FluxImg2ImgPipeline = object
FluxInpaintPipeline = object

if is_diffusers_version(">=", "0.32.0"):
from diffusers import FluxFillPipeline
else:
FluxFillPipeline = object


DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer"
DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3"
Expand Down Expand Up @@ -1452,17 +1457,23 @@ class OVFluxPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxPip


class OVFluxImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxImg2ImgPipeline):
main_input_name = "prompt"
main_input_name = "image"
export_feature = "image-to-image"
auto_model_class = FluxImg2ImgPipeline


class OVFluxInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxInpaintPipeline):
main_input_name = "prompt"
main_input_name = "image"
export_feature = "inpainting"
auto_model_class = FluxInpaintPipeline


class OVFluxFillPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxFillPipeline):
main_input_name = "image"
export_feature = "inpainting"
auto_model_class = FluxFillPipeline


SUPPORTED_OV_PIPELINES = [
OVStableDiffusionPipeline,
OVStableDiffusionImg2ImgPipeline,
Expand Down Expand Up @@ -1531,6 +1542,10 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
OV_INPAINT_PIPELINES_MAPPING["flux"] = OVFluxInpaintPipeline
OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxImg2ImgPipeline

if is_diffusers_version(">=", "0.32.0"):
OV_INPAINT_PIPELINES_MAPPING["flux-fill"] = OVFluxFillPipeline
SUPPORTED_OV_PIPELINES.append(OVFluxFillPipeline)

SUPPORTED_OV_PIPELINES_MAPPINGS = [
OV_TEXT2IMAGE_PIPELINES_MAPPING,
OV_IMAGE2IMAGE_PIPELINES_MAPPING,
Expand Down
1 change: 1 addition & 0 deletions optimum/intel/openvino/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
"stable-diffusion-3": "OVStableDiffusion3Pipeline",
"flux": "OVFluxPipeline",
"flux-fill": "OVFluxFillPipeline",
"pix2struct": "OVModelForPix2Struct",
"latent-consistency": "OVLatentConsistencyModelPipeline",
"open_clip_text": "OVModelOpenCLIPText",
Expand Down
11 changes: 11 additions & 0 deletions optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,14 @@ def __init__(self, *args, **kwargs):
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["openvino", "diffusers"])


class OVFluxFillPipeline(metaclass=DummyObject):
_backends = ["openvino", "diffusers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["openvino", "diffusers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["openvino", "diffusers"])
42 changes: 31 additions & 11 deletions tests/openvino/test_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,13 +667,14 @@ class OVPipelineForInpaintingTest(unittest.TestCase):
if is_transformers_version(">=", "4.40.0"):
SUPPORTED_ARCHITECTURES.append("stable-diffusion-3")
SUPPORTED_ARCHITECTURES.append("flux")
SUPPORTED_ARCHITECTURES.append("flux-fill")

AUTOMODEL_CLASS = AutoPipelineForInpainting
OVMODEL_CLASS = OVPipelineForInpainting

TASK = "inpainting"

def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"):
def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil", model_arch=""):
inputs = _generate_prompts(batch_size=batch_size)

inputs["image"] = _generate_images(
Expand All @@ -683,7 +684,8 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_
height=height, width=width, batch_size=batch_size, channel=1, input_type=input_type
)

inputs["strength"] = 0.75
if model_arch != "flux-fill":
inputs["strength"] = 0.75
inputs["height"] = height
inputs["width"] = width

Expand All @@ -699,7 +701,12 @@ def test_load_vanilla_model_which_is_not_supported(self):
@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_diffusers
def test_ov_pipeline_class_dispatch(self, model_arch: str):
auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
if model_arch != "flux-fill":
auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
else:
from diffusers import FluxFillPipeline

auto_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch])
ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])

self.assertEqual(ov_pipeline.auto_model_class, auto_pipeline.__class__)
Expand All @@ -713,7 +720,9 @@ def test_num_images_per_prompt(self, model_arch: str):
for height in [64, 128]:
for width in [64, 128]:
for num_images_per_prompt in [1, 3]:
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(
height=height, width=width, batch_size=batch_size, model_arch=model_arch
)
outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images
self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3))

Expand Down Expand Up @@ -752,7 +761,9 @@ def test_shape(self, model_arch: str):
height, width, batch_size = 128, 64, 1

for input_type in ["pil", "np", "pt"]:
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type)
inputs = self.generate_inputs(
height=height, width=width, batch_size=batch_size, input_type=input_type, model_arch=model_arch
)

for output_type in ["pil", "np", "pt", "latent"]:
inputs["output_type"] = output_type
Expand All @@ -764,7 +775,7 @@ def test_shape(self, model_arch: str):
elif output_type == "pt":
self.assertEqual(outputs.shape, (batch_size, 3, height, width))
else:
if model_arch != "flux":
if not model_arch.startswith("flux"):
out_channels = (
pipeline.unet.config.out_channels
if pipeline.unet is not None
Expand All @@ -782,17 +793,26 @@ def test_shape(self, model_arch: str):
else:
packed_height = height // pipeline.vae_scale_factor // 2
packed_width = width // pipeline.vae_scale_factor // 2
channels = pipeline.transformer.config.in_channels
channels = (
pipeline.transformer.config.in_channels
if model_arch != "flux-fill"
else pipeline.transformer.out_channels
)
self.assertEqual(outputs.shape, (batch_size, packed_height * packed_width, channels))

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_diffusers
def test_compare_to_diffusers_pipeline(self, model_arch: str):
ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
if model_arch != "flux-fill":
diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
else:
from diffusers import FluxFillPipeline

diffusers_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch])

height, width, batch_size = 64, 64, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)

for output_type in ["latent", "np", "pt"]:
inputs["output_type"] = output_type
Expand All @@ -804,7 +824,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str):

# test generation when input resolution nondevisible on 64
height, width, batch_size = 96, 96, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)

for output_type in ["latent", "np", "pt"]:
inputs["output_type"] = output_type
Expand All @@ -820,7 +840,7 @@ def test_image_reproducibility(self, model_arch: str):
pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])

height, width, batch_size = 64, 64, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)

for generator_framework in ["np", "pt"]:
ov_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED))
Expand Down
6 changes: 5 additions & 1 deletion tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from optimum.exporters.openvino.__main__ import main_export
from optimum.intel import ( # noqa
OVFluxFillPipeline,
OVFluxPipeline,
OVLatentConsistencyModelPipeline,
OVModelForAudioClassification,
Expand Down Expand Up @@ -82,7 +83,9 @@ class OVCLIExportTestCase(unittest.TestCase):
]

if is_transformers_version(">=", "4.45"):
SUPPORTED_ARCHITECTURES.extend([("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux")])
SUPPORTED_ARCHITECTURES.extend(
[("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux"), ("inpainting", "flux-fill")]
)
EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
"gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"t5": 0, # no .model file in the repository
Expand All @@ -97,6 +100,7 @@ class OVCLIExportTestCase(unittest.TestCase):
"stable-diffusion-xl": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2,
"flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"flux-fill": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
}

Expand Down
2 changes: 2 additions & 0 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"falcon-40b": "katuni4ka/tiny-random-falcon-40b",
"flaubert": "hf-internal-testing/tiny-random-flaubert",
"flux": "katuni4ka/tiny-random-flux",
"flux-fill": "katuni4ka/tiny-random-flux-fill",
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
"gpt2": "hf-internal-testing/tiny-random-gpt2",
"gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
Expand Down Expand Up @@ -191,6 +192,7 @@
"open-clip": (20, 28),
"stable-diffusion-3": (66, 42, 58, 30),
"flux": (56, 24, 28, 64),
"flux-fill": (56, 24, 28, 64),
"llava": (30, 9, 1),
"llava_next": (30, 9, 1),
"minicpmv": (30, 26, 1, 6),
Expand Down