You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: optimum/intel/openvino/modeling_base.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -698,11 +698,11 @@ def _export(
698
698
)
699
699
compile_only=False
700
700
701
-
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
702
-
ifload_in_8bitisNoneandnotquantization_config:
701
+
ov_config=OVConfig(dtype="auto")
702
+
ifload_in_8bitisNoneandquantization_configisNone:
703
+
# If load_in_8bit and quantization_config are not specified then ov_config is set to None, and
704
+
# models larger than 1B parameters will be quantized to int8
Copy file name to clipboardExpand all lines: optimum/intel/openvino/modeling_decoder.py
+6-6Lines changed: 6 additions & 6 deletions
Original file line number
Diff line number
Diff line change
@@ -323,11 +323,11 @@ def _export(
323
323
ifuse_cache:
324
324
task=task+"-with-past"
325
325
326
-
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
327
-
ifload_in_8bitisNoneandnotquantization_config:
328
-
ov_export_config=None
329
-
else:
330
-
ov_export_config=OVConfig(dtype="auto")
326
+
ov_config=OVConfig(dtype="auto")
327
+
ifload_in_8bitisNoneandquantization_configisNone:
328
+
# If load_in_8bit and quantization_config are not specified then ov_config is set to None, and
329
+
# models larger than 1B parameters will be quantized to int8
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
249
-
ifload_in_8bitisNoneandnotquantization_config:
248
+
ov_config=OVConfig(dtype="auto")
249
+
ifload_in_8bitisNoneandquantization_configisNone:
250
+
# If load_in_8bit and quantization_config are not specified then ov_config is set to None, and
251
+
# models larger than 1B parameters will be quantized to int8
250
252
ov_config=None
251
-
else:
252
-
ov_config=OVConfig(dtype="fp32")
253
253
254
254
deffn_get_submodels(model):
255
255
return {"model_text": model.text}
@@ -370,11 +370,11 @@ def _export(
370
370
# would end-up removing the directory containing the underlying OpenVINO model
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
374
-
ifload_in_8bitisNoneandnotquantization_config:
373
+
ov_config=OVConfig(dtype="auto")
374
+
ifload_in_8bitisNoneandquantization_configisNone:
375
+
# If load_in_8bit and quantization_config are not specified then ov_config is set to None, and
376
+
# models larger than 1B parameters will be quantized to int8
Copy file name to clipboardExpand all lines: optimum/intel/openvino/modeling_seq2seq.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -593,11 +593,11 @@ def _export(
593
593
"Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`"
594
594
)
595
595
compile_only=False
596
-
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
597
-
ifload_in_8bitisNoneandnotquantization_config:
596
+
ov_config=OVConfig(dtype="auto")
597
+
ifload_in_8bitisNoneandquantization_configisNone:
598
+
# If load_in_8bit and quantization_config are not specified then ov_config is set to None, and
599
+
# models larger than 1B parameters will be quantized to int8
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
659
-
ifload_in_8bitisNoneandnotquantization_config:
659
+
ov_config=OVConfig(dtype="auto")
660
+
ifload_in_8bitisNoneandquantization_configisNone:
661
+
# If load_in_8bit and quantization_config are not specified then ov_config is set to None, and
662
+
# models larger than 1B parameters will be quantized to int8
660
663
ov_config=None
661
-
else:
662
-
# Export in fp32 if compression won't be applied later
0 commit comments