Skip to content

Commit

Permalink
Merge pull request #700 from nikita-savelyevv/refactor-sd-calibration…
Browse files Browse the repository at this point in the history
…-data-collection

Refactor SD calibration data collection
  • Loading branch information
AlexKoff88 authored May 8, 2024
2 parents 9bb4334 + 068236d commit e6fadb1
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 148 deletions.
14 changes: 10 additions & 4 deletions notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
"import transformers\n",
"from pathlib import Path\n",
"from openvino.runtime import Core\n",
"from optimum.intel import OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
"from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
"from optimum.intel.openvino.configuration import OVQuantizationMethod\n",
"\n",
"transformers.logging.set_verbosity_error()\n",
"datasets.logging.set_verbosity_error()"
Expand Down Expand Up @@ -198,9 +199,14 @@
},
"outputs": [],
"source": [
"quantization_config = OVWeightQuantizationConfig(bits=8, dataset=calibration_dataset, num_samples=NUM_SAMPLES)\n",
"int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True, quantization_config=quantization_config)\n",
"int8_pipe.save_pretrained(int8_model_path)"
"int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True)\n",
"quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES, quant_method=OVQuantizationMethod.HYBRID)\n",
"quantizer = OVQuantizer(int8_pipe)\n",
"quantizer.quantize(\n",
" ov_config=OVConfig(quantization_config=quantization_config),\n",
" calibration_dataset=calibration_dataset,\n",
" save_directory=int8_model_path\n",
")"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

class OVQuantizationMethod(str, Enum):
DEFAULT = "default"
HYBRID = "hybrid"


@dataclass
Expand Down
97 changes: 17 additions & 80 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,13 @@
)

from ...exporters.openvino import main_export
from .configuration import OVConfig, OVWeightQuantizationConfig
from .configuration import OVConfig, OVQuantizationMethod, OVWeightQuantizationConfig
from .loaders import OVTextualInversionLoaderMixin
from .modeling_base import OVBaseModel
from .utils import (
ONNX_WEIGHTS_NAME,
OV_TO_NP_TYPE,
OV_XML_FILE_NAME,
PREDEFINED_SD_DATASETS,
_print_compiled_model_properties,
)

Expand Down Expand Up @@ -293,35 +292,27 @@ def _from_pretrained(
else:
kwargs[name] = load_method(new_model_save_dir)

quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)

unet_path = new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name
if quantization_config is not None and quantization_config.dataset is not None:
# load the UNet model uncompressed to apply hybrid quantization further
unet = cls.load_model(unet_path)
# Apply weights compression to other `components` without dataset
weight_quantization_params = {
param: value for param, value in quantization_config.__dict__.items() if param != "dataset"
}
weight_quantization_config = OVWeightQuantizationConfig.from_dict(weight_quantization_params)
else:
weight_quantization_config = quantization_config
unet = cls.load_model(unet_path, weight_quantization_config)

components = {
"vae_encoder": new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name,
"vae_decoder": new_model_save_dir / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / vae_decoder_file_name,
"text_encoder": new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name,
"text_encoder_2": new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name,
}

for key, value in components.items():
components[key] = cls.load_model(value, weight_quantization_config) if value.is_file() else None

if model_save_dir is None:
model_save_dir = new_model_save_dir

if quantization_config is not None and quantization_config.dataset is not None:
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
if quantization_config is None or quantization_config.dataset is None:
unet = cls.load_model(unet_path, quantization_config)
for key, value in components.items():
components[key] = cls.load_model(value, quantization_config) if value.is_file() else None
else:
# Load uncompressed models to apply hybrid quantization further
unet = cls.load_model(unet_path)
for key, value in components.items():
components[key] = cls.load_model(value) if value.is_file() else None
sd_model = cls(unet=unet, config=config, model_save_dir=model_save_dir, **components, **kwargs)

supported_pipelines = (
Expand All @@ -332,12 +323,14 @@ def _from_pretrained(
if not isinstance(sd_model, supported_pipelines):
raise NotImplementedError(f"Quantization in hybrid mode is not supported for {cls.__name__}")

nsamples = quantization_config.num_samples if quantization_config.num_samples else 200
unet_inputs = sd_model._prepare_unet_inputs(quantization_config.dataset, nsamples)
from optimum.intel import OVQuantizer

from .quantization import _hybrid_quantization
hybrid_quantization_config = deepcopy(quantization_config)
hybrid_quantization_config.quant_method = OVQuantizationMethod.HYBRID
quantizer = OVQuantizer(sd_model)
quantizer.quantize(ov_config=OVConfig(quantization_config=hybrid_quantization_config))

unet = _hybrid_quantization(sd_model.unet.model, weight_quantization_config, dataset=unet_inputs)
return sd_model

return cls(
unet=unet,
Expand All @@ -348,62 +341,6 @@ def _from_pretrained(
**kwargs,
)

def _prepare_unet_inputs(
self,
dataset: Union[str, List[Any]],
num_samples: int,
height: Optional[int] = None,
width: Optional[int] = None,
seed: Optional[int] = 42,
**kwargs,
) -> Dict[str, Any]:
self.compile()

size = self.unet.config.get("sample_size", 64) * self.vae_scale_factor
height = height or min(size, 512)
width = width or min(size, 512)

if isinstance(dataset, str):
dataset = deepcopy(dataset)
available_datasets = PREDEFINED_SD_DATASETS.keys()
if dataset not in available_datasets:
raise ValueError(
f"""You have entered a string value for dataset. You can only choose between
{list(available_datasets)}, but the {dataset} was found"""
)

from datasets import load_dataset

dataset_metadata = PREDEFINED_SD_DATASETS[dataset]
dataset = load_dataset(dataset, split=dataset_metadata["split"], streaming=True).shuffle(seed=seed)
input_names = dataset_metadata["inputs"]
dataset = dataset.select_columns(list(input_names.values()))

def transform_fn(data_item):
return {inp_name: data_item[column] for inp_name, column in input_names.items()}

else:

def transform_fn(data_item):
return data_item if isinstance(data_item, (list, dict)) else [data_item]

from .quantization import InferRequestWrapper

calibration_data = []
self.unet.request = InferRequestWrapper(self.unet.request, calibration_data)

for inputs in dataset:
inputs = transform_fn(inputs)
if isinstance(inputs, dict):
self.__call__(**inputs, height=height, width=width)
else:
self.__call__(*inputs, height=height, width=width)
if len(calibration_data) >= num_samples:
break

self.unet.request = self.unet.request.request
return calibration_data[:num_samples]

@classmethod
def _from_transformers(
cls,
Expand Down
Loading

0 comments on commit e6fadb1

Please sign in to comment.