Skip to content

Commit

Permalink
fix style
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Feb 15, 2024
1 parent 70468a6 commit f1c9d6f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
1 change: 0 additions & 1 deletion optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def _from_transformers(
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
**kwargs,
):

if config.model_type.replace("_", "-") not in _SUPPORTED_ARCHITECTURES:
logger.warning(
f"This architecture : {config.model_type} was not validated, only :{', '.join(_SUPPORTED_ARCHITECTURES)} architectures were "
Expand Down
6 changes: 2 additions & 4 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,8 @@ def _int4_weight_only_quantization(
if model.export_feature != "text-generation":
raise ValueError("Only `OVModelForCausalLM` are supported for now")


quantization_config = quantization_config or _check_default_4bit_configs(model.config)

# Data-free weight-only quantization to asymmetric INT4
if quantization_config is None:
quantization_config = OVWeightQuantizationConfig(bits=4, sym=False)
Expand Down Expand Up @@ -632,7 +631,7 @@ def _weight_only_quantization(model: OVBaseModel, quantization_config: Union[OVW
mode = CompressWeightsMode.INT8_SYM if config.sym else CompressWeightsMode.INT8_ASYM
else:
mode = CompressWeightsMode.INT4_SYM if config.sym else CompressWeightsMode.INT4_ASYM

model.model = nncf.compress_weights(
ov_model,
mode=mode,
Expand All @@ -644,4 +643,3 @@ def _weight_only_quantization(model: OVBaseModel, quantization_config: Union[OVW
ignored_scope=ignored_scope,
dataset=dataset,
)

8 changes: 6 additions & 2 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type):
def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_int8, expected_ov_int4):
with tempfile.TemporaryDirectory() as tmp_dir:
model_id = MODEL_NAMES[model_type]
model = model_cls.from_pretrained(model_id, export=True, quantization_config=OVWeightQuantizationConfig(bits=4))
model = model_cls.from_pretrained(
model_id, export=True, quantization_config=OVWeightQuantizationConfig(bits=4)
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
Expand Down Expand Up @@ -404,7 +406,9 @@ def transform_fn(data, tokenizer):
model = model_cls.from_pretrained(
model_id,
export=True,
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8, dataset=quantization_dataset),
quantization_config=OVWeightQuantizationConfig(
bits=4, sym=True, group_size=-1, ratio=0.8, dataset=quantization_dataset
),
)

_, num_int8, num_int4 = get_num_quantized_nodes(model)
Expand Down

0 comments on commit f1c9d6f

Please sign in to comment.