Skip to content

Commit

Permalink
Refactor experimental features (#969)
Browse files Browse the repository at this point in the history
* PTQ:
Rename PTQ APIs without 'experimental'.
Remove new_experimental_exporter flag from APIs.
* GPTQ:
Remove new_experimental_exporter flag from APIs.
Replace GradientPTQConfig with GradientPTQConfigV2 (when V2 is removed from the class name).
* Tutorials:
Remove mentions of experimental (including in some kpi_data methods fixes that are missing from previous PR).
Notice, all tests are now wrapped using MCTQ infrastructure. Thus, some tests had to be fixed as well to adapt to the new output quantized model.

---------

Co-authored-by: reuvenp <reuvenp@altair-semi.com>
  • Loading branch information
reuvenperetz and reuvenp committed Mar 5, 2024
1 parent 32415f1 commit 36f6262
Show file tree
Hide file tree
Showing 74 changed files with 729 additions and 859 deletions.
2 changes: 1 addition & 1 deletion model_compression_toolkit/gptq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================

from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, RoundingType, GradientPTQConfigV2, GPTQHessianScoresConfig
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, RoundingType, GradientPTQConfig, GPTQHessianScoresConfig
from model_compression_toolkit.gptq.keras.quantization_facade import keras_gradient_post_training_quantization
from model_compression_toolkit.gptq.keras.quantization_facade import get_keras_gptq_config
from model_compression_toolkit.gptq.pytorch.quantization_facade import pytorch_gradient_post_training_quantization
Expand Down
77 changes: 5 additions & 72 deletions model_compression_toolkit/gptq/common/gptq_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ class GradientPTQConfig:
"""
Configuration to use for quantization with GradientPTQ.
"""

def __init__(self, n_iter: int,
def __init__(self,
n_epochs: int,
optimizer: Any,
optimizer_rest: Any = None,
loss: Callable = None,
Expand All @@ -79,7 +79,7 @@ def __init__(self, n_iter: int,
Initialize a GradientPTQConfig.
Args:
n_iter (int): Number of iterations to train.
n_epochs (int): Number of representative dataset epochs to train.
optimizer (Any): Optimizer to use.
optimizer_rest (Any): Optimizer to use for bias and quantizer parameters.
loss (Callable): The loss to use. should accept 6 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors,
Expand All @@ -96,7 +96,8 @@ def __init__(self, n_iter: int,
gptq_quantizer_params_override (dict): A dictionary of parameters to override in GPTQ quantizer instantiation. Defaults to None (no parameters).
"""
self.n_iter = n_iter

self.n_epochs = n_epochs
self.optimizer = optimizer
self.optimizer_rest = optimizer_rest
self.loss = loss
Expand All @@ -114,71 +115,3 @@ def __init__(self, n_iter: int,
else gptq_quantizer_params_override


class GradientPTQConfigV2(GradientPTQConfig):
"""
Configuration to use for quantization with GradientPTQV2.
"""
def __init__(self, n_epochs: int,
optimizer: Any,
optimizer_rest: Any = None,
loss: Callable = None,
log_function: Callable = None,
train_bias: bool = True,
rounding_type: RoundingType = RoundingType.SoftQuantizer,
use_hessian_based_weights: bool = True,
optimizer_quantization_parameter: Any = None,
optimizer_bias: Any = None,
regularization_factor: float = REG_DEFAULT,
hessian_weights_config: GPTQHessianScoresConfig = GPTQHessianScoresConfig(),
gptq_quantizer_params_override: Dict[str, Any] = None):
"""
Initialize a GradientPTQConfigV2.
Args:
n_epochs (int): Number of representative dataset epochs to train.
optimizer (Any): Optimizer to use.
optimizer_rest (Any): Optimizer to use for bias and quantizer parameters.
loss (Callable): The loss to use. should accept 6 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors,
the 3rd is a list of quantized weights, the 4th is a list of float weights, the 5th and 6th lists are the mean and std of the tensors
accordingly. see example in multiple_tensors_mse_loss
log_function (Callable): Function to log information about the GPTQ process.
train_bias (bool): Whether to update the bias during the training or not.
rounding_type (RoundingType): An enum that defines the rounding type.
use_hessian_based_weights (bool): Whether to use Hessian-based weights for weighted average loss.
optimizer_quantization_parameter (Any): Optimizer to override the rest optimizer for quantizer parameters.
optimizer_bias (Any): Optimizer to override the rest optimizerfor bias.
regularization_factor (float): A floating point number that defines the regularization factor.
hessian_weights_config (GPTQHessianScoresConfig): A configuration that include all necessary arguments to run a computation of Hessian scores for the GPTQ loss.
gptq_quantizer_params_override (dict): A dictionary of parameters to override in GPTQ quantizer instantiation. Defaults to None (no parameters).
"""

super().__init__(n_iter=None,
optimizer=optimizer,
optimizer_rest=optimizer_rest,
loss=loss,
log_function=log_function,
train_bias=train_bias,
rounding_type=rounding_type,
use_hessian_based_weights=use_hessian_based_weights,
optimizer_quantization_parameter=optimizer_quantization_parameter,
optimizer_bias=optimizer_bias,
regularization_factor=regularization_factor,
hessian_weights_config=hessian_weights_config,
gptq_quantizer_params_override=gptq_quantizer_params_override)
self.n_epochs = n_epochs

@classmethod
def from_v1(cls, n_ptq_iter: int, config_v1: GradientPTQConfig):
"""
Initialize a GradientPTQConfigV2 from GradientPTQConfig instance.
Args:
n_ptq_iter (int): Number of PTQ calibration iters (length of representative dataset).
config_v1 (GradientPTQConfig): A GPTQ config to convert to V2.
"""
n_epochs = int(round(config_v1.n_iter) / n_ptq_iter)
v1_params = config_v1.__dict__
v1_params = {k: v for k, v in v1_params.items() if k != 'n_iter'}
return cls(n_epochs, **v1_params)
4 changes: 2 additions & 2 deletions model_compression_toolkit/gptq/keras/gptq_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
from model_compression_toolkit.core import common
from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.gptq.keras.graph_info import get_weights_for_loss, get_gptq_trainable_parameters
from model_compression_toolkit.gptq.keras.quantizer.regularization_factory import get_regularization
Expand All @@ -56,7 +56,7 @@ class KerasGPTQTrainer(GPTQTrainer):
def __init__(self,
graph_float: Graph,
graph_quant: Graph,
gptq_config: GradientPTQConfigV2,
gptq_config: GradientPTQConfig,
fw_impl: FrameworkImplementation,
fw_info: FrameworkInfo,
representative_data_gen: Callable,
Expand Down
44 changes: 15 additions & 29 deletions model_compression_toolkit/gptq/keras/quantization_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF
from model_compression_toolkit.core.common.user_info import UserInformation
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
Expand Down Expand Up @@ -66,7 +66,7 @@ def get_keras_gptq_config(n_epochs: int,
loss: Callable = GPTQMultipleTensorsLoss(),
log_function: Callable = None,
use_hessian_based_weights: bool = True,
regularization_factor: float = REG_DEFAULT) -> GradientPTQConfigV2:
regularization_factor: float = REG_DEFAULT) -> GradientPTQConfig:
"""
Create a GradientPTQConfigV2 instance for Keras models.
Expand Down Expand Up @@ -102,26 +102,25 @@ def get_keras_gptq_config(n_epochs: int,
"""
bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT,
momentum=GPTQ_MOMENTUM)
return GradientPTQConfigV2(n_epochs,
optimizer,
optimizer_rest=optimizer_rest,
loss=loss,
log_function=log_function,
train_bias=True,
optimizer_bias=bias_optimizer,
use_hessian_based_weights=use_hessian_based_weights,
regularization_factor=regularization_factor)
return GradientPTQConfig(n_epochs,
optimizer,
optimizer_rest=optimizer_rest,
loss=loss,
log_function=log_function,
train_bias=True,
optimizer_bias=bias_optimizer,
use_hessian_based_weights=use_hessian_based_weights,
regularization_factor=regularization_factor)


def keras_gradient_post_training_quantization(in_model: Model,
representative_data_gen: Callable,
gptq_config: GradientPTQConfigV2,
gptq_config: GradientPTQConfig,
gptq_representative_data_gen: Callable = None,
target_kpi: KPI = None,
core_config: CoreConfig = CoreConfig(),
fw_info: FrameworkInfo = DEFAULT_KERAS_INFO,
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC,
new_experimental_exporter: bool = True) -> Tuple[Model, UserInformation]:
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, UserInformation]:
"""
Quantize a trained Keras model using post-training quantization. The model is quantized using a
symmetric constraint quantization thresholds (power of two).
Expand All @@ -141,13 +140,12 @@ def keras_gradient_post_training_quantization(in_model: Model,
Args:
in_model (Model): Keras model to quantize.
representative_data_gen (Callable): Dataset used for calibration.
gptq_config (GradientPTQConfigV2): Configuration for using gptq (e.g. optimizer).
gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer).
gptq_representative_data_gen (Callable): Dataset used for GPTQ training. If None defaults to representative_data_gen
target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired.
core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). `Default Keras info <https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/core/keras/default_framework_info.py>`_
target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to.
new_experimental_exporter (bool): Whether to wrap the quantized model using quantization information or not. Enabled by default. Experimental and subject to future changes.
Returns:
Expand Down Expand Up @@ -232,19 +230,7 @@ def keras_gradient_post_training_quantization(in_model: Model,
if core_config.debug_config.analyze_similarity:
analyzer_model_quantization(representative_data_gen, tb_w, tg_gptq, fw_impl, fw_info)

if new_experimental_exporter:
Logger.warning('Using new experimental wrapped and ready for export models. To '
'disable it, please set new_experimental_exporter to False when '
'calling keras_gradient_post_training_quantization. '
'If you encounter an issue please file a bug.')

return get_exportable_keras_model(tg_gptq)

return export_model(tg_gptq,
fw_info,
fw_impl,
tb_w,
bit_widths_config)
return get_exportable_keras_model(tg_gptq)

else:
# If tensorflow is not installed,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# ==============================================================================
from typing import Dict, List, Tuple

from model_compression_toolkit.gptq import GradientPTQConfigV2
from model_compression_toolkit.gptq import GradientPTQConfig
from model_compression_toolkit.core import common
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
Expand All @@ -33,15 +33,15 @@


def quantization_builder(n: common.BaseNode,
gptq_config: GradientPTQConfigV2
gptq_config: GradientPTQConfig
) -> Tuple[Dict[str, BaseKerasGPTQTrainableQuantizer], List[BaseKerasInferableQuantizer]]:
"""
Build quantizers for a node according to its quantization configuration and
a global NoOpQuantizeConfig object.
Args:
n: Node to build its QuantizeConfig.
gptq_config (GradientPTQConfigV2): GradientPTQConfigV2 configuration.
gptq_config (GradientPTQConfig): GradientPTQConfigV2 configuration.
Returns:
A dictionary which maps the weights kernel attribute to a quantizer for GPTQ training.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# ==============================================================================
from typing import Callable

from model_compression_toolkit.gptq import RoundingType, GradientPTQConfigV2, GradientPTQConfig
from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import \
SoftQuantizerRegularization

Expand All @@ -38,8 +38,6 @@ def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen:
for _ in representative_data_gen():
num_batches += 1

n_epochs = GradientPTQConfigV2.from_v1(n_ptq_iter=num_batches, config_v1=gptq_config).n_epochs if \
not type(gptq_config) == GradientPTQConfigV2 else gptq_config.n_epochs
return SoftQuantizerRegularization(total_gradient_steps=num_batches * n_epochs)
return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
else:
return lambda m, e_reg: 0
4 changes: 2 additions & 2 deletions model_compression_toolkit/gptq/pytorch/gptq_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
from model_compression_toolkit.core.common import Graph, BaseNode
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
Expand All @@ -46,7 +46,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
def __init__(self,
graph_float: Graph,
graph_quant: Graph,
gptq_config: GradientPTQConfigV2,
gptq_config: GradientPTQConfig,
fw_impl: FrameworkImplementation,
fw_info: FrameworkInfo,
representative_data_gen: Callable,
Expand Down
Loading

0 comments on commit 36f6262

Please sign in to comment.