diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 1b880e736c..5013db482e 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -33,6 +33,7 @@ StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, ) +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available from huggingface_hub import snapshot_download @@ -90,6 +91,7 @@ def __init__( tokenizer: Optional["CLIPTokenizer"] = None, tokenizer_2: Optional["CLIPTokenizer"] = None, feature_extractor: Optional["CLIPFeatureExtractor"] = None, + safety_checker: Optional["StableDiffusionSafetyChecker"] = None, device: str = "CPU", dynamic_shapes: bool = True, compile: bool = True, @@ -135,7 +137,7 @@ def __init__( self.tokenizer_2 = tokenizer_2 self.scheduler = scheduler self.feature_extractor = feature_extractor - self.safety_checker = None + self.safety_checker = safety_checker self.preprocessors = [] if self.is_dynamic: @@ -1082,6 +1084,22 @@ def __call__( **kwargs, ) + def run_safety_checker(self, image: np.ndarray): + if self.safety_checker is None: + has_nsfw_concept = None + else: + # Transpose the image to NHWC + image = image.transpose(0, 2, 3, 1) + + feature_extractor_input = self.image_processor.numpy_to_pil(image) + safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt") + image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_checker_input.pixel_values) + + # Transpose the image back to NCHW + image = image.transpose(0, 3, 1, 2) + + return image, has_nsfw_concept + def _raise_invalid_batch_size( expected_batch_size: int, batch_size: int, num_images_per_prompt: int, guidance_scale: float