Skip to content

Commit 9474efb

Browse files
Update number of expected exported tokenizers (#940)
* update number of expected exported tokenizers * tokenizer version * warning * fix last test
1 parent 7702d35 commit 9474efb

File tree

3 files changed

+33
-5
lines changed

3 files changed

+33
-5
lines changed

optimum/exporters/openvino/convert.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
_torch_version,
4242
_transformers_version,
4343
compare_versions,
44+
is_tokenizers_version,
4445
is_transformers_version,
4546
)
4647
from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available
@@ -730,6 +731,12 @@ def export_tokenizer(
730731
except ModuleNotFoundError:
731732
return
732733

734+
if is_tokenizers_version(">", "0.19"):
735+
logger.warning(
736+
"Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19. "
737+
"Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO."
738+
)
739+
733740
if not isinstance(output, Path):
734741
output = Path(output)
735742

optimum/intel/utils/import_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@
4343
except importlib_metadata.PackageNotFoundError:
4444
_transformers_available = False
4545

46+
_tokenizers_available = importlib.util.find_spec("tokenizers") is not None
47+
_tokenizers_version = "N/A"
48+
if _tokenizers_available:
49+
try:
50+
_tokenizers_version = importlib_metadata.version("tokenizers")
51+
except importlib_metadata.PackageNotFoundError:
52+
_tokenizers_available = False
4653

4754
_torch_available = importlib.util.find_spec("torch") is not None
4855
_torch_version = "N/A"
@@ -181,6 +188,10 @@ def is_transformers_available():
181188
return _transformers_available
182189

183190

191+
def is_tokenizers_available():
192+
return _tokenizers_available
193+
194+
184195
def is_neural_compressor_available():
185196
return _neural_compressor_available
186197

@@ -340,6 +351,15 @@ def is_transformers_version(operation: str, version: str):
340351
return compare_versions(parse(_transformers_version), operation, version)
341352

342353

354+
def is_tokenizers_version(operation: str, version: str):
355+
"""
356+
Compare the current Tokenizers version to a given reference with an operation.
357+
"""
358+
if not _tokenizers_available:
359+
return False
360+
return compare_versions(parse(_tokenizers_version), operation, version)
361+
362+
343363
def is_optimum_version(operation: str, version: str):
344364
return compare_versions(parse(_optimum_version), operation, version)
345365

tests/openvino/test_exporters_cli.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from optimum.intel.utils.import_utils import (
4949
compare_versions,
5050
is_openvino_tokenizers_available,
51+
is_tokenizers_version,
5152
)
5253

5354

@@ -73,17 +74,17 @@ class OVCLIExportTestCase(unittest.TestCase):
7374
("image-to-image", "stable-diffusion-xl-refiner"),
7475
)
7576
EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
76-
"gpt2": 2,
77+
"gpt2": 2 if is_tokenizers_version("<", "0.20") else 0,
7778
"t5": 0, # no .model file in the repository
7879
"albert": 0, # not supported yet
7980
"distilbert": 1, # no detokenizer
80-
"roberta": 2,
81+
"roberta": 2 if is_tokenizers_version("<", "0.20") else 0,
8182
"vit": 0, # no tokenizer for image model
8283
"wav2vec2": 0, # no tokenizer
8384
"bert": 1, # no detokenizer
84-
"blenderbot": 2,
85-
"stable-diffusion": 2,
86-
"stable-diffusion-xl": 4,
85+
"blenderbot": 2 if is_tokenizers_version("<", "0.20") else 0,
86+
"stable-diffusion": 2 if is_tokenizers_version("<", "0.20") else 0,
87+
"stable-diffusion-xl": 4 if is_tokenizers_version("<", "0.20") else 0,
8788
}
8889

8990
SUPPORTED_SD_HYBRID_ARCHITECTURES = (

0 commit comments

Comments
 (0)