Skip to content

Commit

Permalink
Convert tokenizers with openvino_tokenizers
Browse files Browse the repository at this point in the history
  • Loading branch information
slyalin committed Jan 5, 2024
1 parent 19fef90 commit feba5bf
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from requests.exceptions import ConnectionError as RequestsConnectionError
from transformers import AutoConfig, AutoTokenizer
from openvino import save_model

from optimum.exporters import TasksManager
from optimum.exporters.onnx import __main__ as optimum_main
Expand Down Expand Up @@ -46,6 +47,24 @@
logger = logging.getLogger(__name__)


def tokenizer_export(
tokenizer,
output: Union[str, Path],
suffix: Optional[str] = ""
):
try:
from openvino_tokenizers import convert_tokenizer
ov_tokenizer, ov_detokenizer = convert_tokenizer(tokenizer, with_detokenizer=True)
if isinstance(output, str):
output = Path(output)
tokenizer_path = output.joinpath("openvino_tokenizer" + suffix + ".xml")
detokenizer_path = output.joinpath("openvino_detokenizer" + suffix + ".xml")
save_model(ov_tokenizer, tokenizer_path)
save_model(ov_detokenizer, detokenizer_path)
except Exception as exception:
print("[ WARNING ] OpenVINO tokenizer/detokenizer models couldn't be exported because of exception:", exception)


def main_export(
model_name_or_path: str,
output: Union[str, Path],
Expand Down Expand Up @@ -328,6 +347,12 @@ class StoreAttr(object):
if generation_config is not None:
generation_config.save_pretrained(output)
maybe_save_preprocessors(model_name_or_path, output)
try:
# Avoid loding it for the second time if loaded before
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer_export(tokenizer, output)
except:
print("[ WARNING ] Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer models won't be generated.")

if model.config.is_encoder_decoder and task.startswith("text-generation"):
raise ValueError(
Expand Down Expand Up @@ -358,10 +383,12 @@ class StoreAttr(object):
tokenizer = getattr(model, "tokenizer", None)
if tokenizer is not None:
tokenizer.save_pretrained(output.joinpath("tokenizer"))
tokenizer_export(tokenizer, output)

tokenizer_2 = getattr(model, "tokenizer_2", None)
if tokenizer_2 is not None:
tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
tokenizer_export(tokenizer, output, "_2")

model.save_config(output)

Expand Down

0 comments on commit feba5bf

Please sign in to comment.