Skip to content

Commit

Permalink
Merge branch 'main' into optimize-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Nov 3, 2024
2 parents 47b5ad3 + 74d2161 commit ce4aa07
Show file tree
Hide file tree
Showing 20 changed files with 108 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_generation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9"]
python-version: ["3.9", "3.12"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
fail-fast: false
matrix:
torch-version: ["2.2.0", "2.3.*", "2.4.*"]
python-version: ["3.9"]
python-version: ["3.9", "3.11"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
Expand Down
21 changes: 12 additions & 9 deletions .github/workflows/test_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
"*training*",
"*export*",
]
transformers-version: ["4.36.0", "4.45.*"]
transformers-version: ["4.36.0", "latest"]
python-version: ["3.9"]
os: [ubuntu-latest]

Expand All @@ -41,19 +41,23 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Install lowest compatible transformers version
if: ${{ matrix.transformers-version != 'latest' }}
run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*

- name: Install dependencies
run: |
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[openvino,openvino-tokenizers,diffusers,tests] transformers[testing]==${{ matrix.transformers-version }}
pip install .[openvino,openvino-tokenizers,diffusers,tests] transformers[testing]
- if: ${{ matrix.transformers-version == '4.36.0' }}
name: Downgrade Accelerate
run: pip install accelerate==0.*
- if: ${{ matrix.transformers-version != 'latest' }}
name: Downgrade dependencies
run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*

- name: Assert versions
run: |
python -c "import transformers; print(transformers.__version__); assert transformers.__version__.startswith('${{ matrix.transformers-version }}'.replace('.*', ''))"
- if: ${{ matrix.test-pattern == '*modeling*' }}
name: Uninstall NNCF
run: pip uninstall -y nncf

- name: Test with Pytest
run: |
Expand All @@ -65,7 +69,6 @@ jobs:
name: Install dependencies (nightly)
run: |
pip install --upgrade --pre openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
pip uninstall -y nncf
- if: ${{ matrix.test-pattern == '*modeling*' }}
name: Test with Pytest (nightly)
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_openvino_examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9"]
python-version: ["3.9", "3.12"]

runs-on: ubuntu-22.04

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_openvino_notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9"]
python-version: ["3.9", "3.12"]

runs-on: ubuntu-22.04

Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/test_openvino_slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
# This also ensures that the test fails if dependencies break for Python 3.7
python-version: ["3.9", "3.12"]
os: ["ubuntu-22.04", "windows-latest"]
transformers-version: ["4.45.*"]
transformers-version: ["latest"]
openvino: ["openvino openvino-tokenizers"]
nncf: ["nncf"]
include:
Expand All @@ -39,12 +39,12 @@ jobs:
nncf: "nncf"
- python-version: "3.12"
os: "ubuntu-22.04"
transformers-version: "4.45.*"
transformers-version: "latest"
openvino: "--pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly"
nncf: "nncf"
- python-version: "3.12"
os: "ubuntu-22.04"
transformers-version: "4.45.*"
transformers-version: "latest"
openvino: "--pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly"
nncf: "git+https://github.com/openvinotoolkit/nncf.git"

Expand All @@ -64,11 +64,11 @@ jobs:
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install ${{ matrix.openvino }}
pip install .[tests] transformers[testing]==${{ matrix.transformers-version }}
pip install .[tests] transformers[testing]
- name: Assert versions
run: |
python -c "import transformers; print(transformers.__version__); assert transformers.__version__.startswith('${{ matrix.transformers-version }}'.replace('.*', ''))"
- if: ${{ matrix.transformers-version != 'latest' }}
name: Downgrade dependencies
run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*

- name: Test with Pytest
run: |
Expand Down
1 change: 1 addition & 0 deletions examples/openvino/audio-classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
transformers>=4.36.0,<4.46.0
datasets>=1.14.0,<2.20.0
evaluate
librosa
Expand Down
1 change: 1 addition & 0 deletions examples/openvino/image-classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
transformers>=4.36.0,<4.46.0
datasets>=1.14.0,<2.20.0
torch >= 1.9.0
torchvision>=0.6.0
Expand Down
1 change: 1 addition & 0 deletions examples/openvino/question-answering/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
transformers>=4.36.0,<4.46.0
datasets>=1.14.0,<2.20.0
torch >= 1.9.0
evaluate
Expand Down
1 change: 1 addition & 0 deletions examples/openvino/text-classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
transformers>=4.36.0,<4.46.0
datasets>=1.14.0,<2.20.0
sentencepiece != 0.1.92
scipy
Expand Down
3 changes: 2 additions & 1 deletion optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
get_diffusion_models_for_export,
)
from optimum.intel.utils.import_utils import (
_diffusers_version,
_nncf_version,
_open_clip_version,
_optimum_intel_version,
Expand Down Expand Up @@ -806,7 +807,7 @@ def _add_version_info_to_model(model: Model, library_name: Optional[str] = None)

model.set_rt_info(sentence_transformers.__version__, ["optimum", "sentence_transformers_version"])
if library_name == "diffusers":
model.set_rt_info(_optimum_version, ["optimum", "diffusers_version"])
model.set_rt_info(_diffusers_version, ["optimum", "diffusers_version"])
elif library_name == "timm":
model.set_rt_info(_timm_version, ["optimum", "timm_version"])
elif library_name == "open_clip":
Expand Down
20 changes: 20 additions & 0 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2505,6 +2505,26 @@ def patched_forward(*args, **kwargs):

self.patched_forward = patched_forward

def __enter__(self):
super().__enter__()
if is_transformers_version(">=", "4.45.0"):
from transformers.models.gemma2.modeling_gemma2 import GEMMA2_ATTENTION_CLASSES

sdpa_attn = GEMMA2_ATTENTION_CLASSES["sdpa"]
eager_attn = GEMMA2_ATTENTION_CLASSES["eager"]

for layer in self._model.model.layers:
if isinstance(layer.self_attn, eager_attn):
layer.self_attn._orig_forward = layer.self_attn.forward
layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)

def __exit__(self, exc_type, exc_value, traceback):
super().__exit__(exc_type, exc_value, traceback)
if is_transformers_version(">=", "4.45.0"):
for layer in self._model.model.layers:
if hasattr(layer.self_attn, "_orig_forward"):
layer.self_attn.forward = layer.self_attn._orig_forward


def _decilm_attn_forward(
self,
Expand Down
21 changes: 12 additions & 9 deletions optimum/intel/neural_compressor/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from transformers import Trainer
from transformers.data.data_collator import DataCollator
from transformers.debug_utils import DebugOption, DebugUnderflowOverflow
from transformers.feature_extraction_utils import FeatureExtractionMixin
from transformers.modeling_utils import PreTrainedModel, get_parameter_dtype, unwrap_model
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
Expand Down Expand Up @@ -104,7 +105,7 @@
from neural_compressor.config import _BaseQuantizationConfig


__version__ = "4.22.2"
__version__ = "4.46.0"


logger = logging.get_logger(__name__)
Expand All @@ -122,8 +123,9 @@ def __init__(
data_collator: Optional[DataCollator] = None,
train_dataset: Optional[Dataset] = None,
eval_dataset: Optional[Dataset] = None,
tokenizer: Optional[PreTrainedTokenizerBase] = None,
processing_class: Optional[Union[PreTrainedTokenizerBase, FeatureExtractionMixin]] = None,
model_init: Callable[[], PreTrainedModel] = None,
compute_loss_func: Optional[Callable] = None,
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
callbacks: Optional[List[TrainerCallback]] = None,
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
Expand All @@ -132,6 +134,7 @@ def __init__(
pruning_config: Optional[_BaseQuantizationConfig] = None,
distillation_config: Optional[_BaseQuantizationConfig] = None,
task: Optional[str] = None,
**kwargs,
):
self.neftune_noise_alpha = None

Expand All @@ -141,12 +144,12 @@ def __init__(
data_collator,
train_dataset,
eval_dataset,
tokenizer,
model_init,
compute_metrics,
callbacks,
optimizers,
preprocess_logits_for_metrics,
processing_class or kwargs.get("tokenizer", None),
model_init=model_init,
compute_metrics=compute_metrics,
callbacks=callbacks,
optimizers=optimizers,
preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)

if self.args.device.type == "cuda" and not is_neural_compressor_version(">", "2.0.0"):
Expand Down Expand Up @@ -766,7 +769,7 @@ def _get_logits(model_outputs):
output_names = ["logits", "start_logits", "end_logits"]
return tuple(model_outputs.get(name) for name in output_names if name in model_outputs)

def compute_loss(self, model, inputs, return_outputs=False):
def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
"""
How the loss is computed by Trainer. By default, all models return the loss in the first element.
"""
Expand Down
3 changes: 3 additions & 0 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,9 +522,12 @@ def forward(
attention_mask: Optional[torch.LongTensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
position_ids: Optional[torch.LongTensor] = None,
token_type_ids: Optional[torch.LongTensor] = None,
**kwargs,
) -> CausalLMOutputWithPast:
self.compile()
# added as model.generate validates model inputs based on forward signature
kwargs["token_type_ids"] = token_type_ids

inputs = self.prepare_inputs(
input_ids=input_ids,
Expand Down
12 changes: 9 additions & 3 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ def _from_pretrained(
"tokenizer_2": None,
"tokenizer_3": None,
"feature_extractor": None,
"image_encoder": None,
"safety_checker": None,
}
for name in submodels.keys():
if kwargs.get(name) is not None:
Expand All @@ -434,6 +436,10 @@ def _from_pretrained(
"text_encoder_3": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER / text_encoder_3_file_name,
}

for config_key, value in config.items():
if config_key not in models and config_key not in kwargs and config_key not in submodels:
kwargs[config_key] = value

compile_only = kwargs.get("compile_only", False)
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
if (quantization_config is None or quantization_config.dataset is None) and not compile_only:
Expand Down Expand Up @@ -995,9 +1001,9 @@ def modules(self):
class OVModelTextEncoder(OVPipelinePart):
def __init__(self, model: openvino.runtime.Model, parent_pipeline: OVDiffusionPipeline, model_name: str = ""):
super().__init__(model, parent_pipeline, model_name)
self.hidden_states_output_names = sorted(
{name for out in self.model.outputs for name in out.names if name.startswith("hidden_states")}
)
self.hidden_states_output_names = [
name for out in self.model.outputs for name in out.names if name.startswith("hidden_states")
]

def forward(
self,
Expand Down
5 changes: 5 additions & 0 deletions optimum/intel/openvino/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ def __init__(
logger.warning("OVTrainer is deprecated and will be removed in optimum-intel v1.22.0.")

if is_transformers_version(">=", "4.45.0"):
if is_transformers_version(">=", "4.46.0"):
raise ImportError(
f"Unsupported transformers version found is {_transformers_version} which is not supported by the OVTrainer. Please downgrade to v4.44"
)

logger.warning(
f"The transformers version found is {_transformers_version} which is not officially supported by the OVTrainer, use at your own risk"
)
Expand Down
10 changes: 3 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

INSTALL_REQUIRE = [
"torch>=1.11",
"transformers>=4.36.0,<4.46.0",
"optimum~=1.23",
"transformers>=4.36,<4.47",
"datasets>=1.4.0",
"sentencepiece",
"setuptools",
Expand Down Expand Up @@ -61,13 +61,9 @@
QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor[pt]>3.0", "accelerate"],
"openvino": [
"nncf>=2.11.0",
"openvino==2024.4.1.dev20240926",
"openvino-tokenizers==2024.4.1.0.dev20240926",
],
"nncf": ["nncf>=2.11.0"],
"openvino": ["nncf>=2.11.0", "openvino==2024.4.1.dev20240926", "openvino-tokenizers==2024.4.1.0.dev20240926"],
"neural-compressor": ["neural-compressor[pt]>3.0", "accelerate", "transformers<4.46"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39,<4.45"],
"diffusers": ["diffusers"],
"quality": QUALITY_REQUIRE,
Expand Down
17 changes: 13 additions & 4 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,15 @@ def test_compare_to_transformers(self, model_arch):
if model_arch in self.REMOTE_CODE_MODELS:
model_kwargs = {"trust_remote_code": True}

# starting from transformers 4.45.0 gemma2 uses eager attention by default, while ov - sdpa
if model_arch == "gemma2" and is_transformers_version(">=", "4.45.0"):
model_kwargs["attn_implementation"] = "sdpa"

ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs)
self.assertIsInstance(ov_model.config, PretrainedConfig)
self.assertTrue(ov_model.use_cache)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
tokens = tokenizer("This is a sample output", return_tensors="pt")
tokens.pop("token_type_ids", None)

ov_outputs = ov_model(**tokens)
self.assertTrue("logits" in ov_outputs)
Expand Down Expand Up @@ -906,7 +909,6 @@ def test_compare_to_transformers(self, model_arch):
# Compare batched generation
tokenizer.padding_side = "left"
tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
tokens.pop("token_type_ids", None)
ov_model.generation_config.eos_token_id = None
transformers_model.generation_config.eos_token_id = None
ov_model.config.eos_token_id = None
Expand All @@ -930,7 +932,10 @@ def test_compare_to_transformers(self, model_arch):

additional_inputs = {"past_key_values": DynamicCache()}
transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config, **additional_inputs)
self.assertTrue(torch.allclose(ov_outputs, transformers_outputs))
self.assertTrue(
torch.allclose(ov_outputs, transformers_outputs),
"OV output {ov_outputs}\nTransformers output {transformers_output}",
)

del transformers_model
del ov_model
Expand Down Expand Up @@ -1095,6 +1100,11 @@ def test_beam_search(self, model_arch):
"config": AutoConfig.from_pretrained(model_id, trust_remote_code=True),
"trust_remote_code": True,
}

# starting from transformers 4.45.0 gemma2 uses eager attention by default, while ov - sdpa
if model_arch == "gemma2" and is_transformers_version(">=", "4.45.0"):
model_kwargs["attn_implementation"] = "sdpa"

# Qwen tokenizer does not support padding, chatglm, glm4 testing models produce nan that incompatible with beam search
if model_arch in ["qwen", "chatglm", "glm4"]:
return
Expand Down Expand Up @@ -1170,7 +1180,6 @@ def test_beam_search(self, model_arch):
from transformers.cache_utils import DynamicCache
tokenizer.pad_token_id = tokenizer.eos_token_id
tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
tokens.pop("token_type_ids", None)
ov_model_stateful.generation_config.eos_token_id = None
ov_model_stateless.generation_config.eos_token_id = None
transformers_model.generation_config.eos_token_id = None
Expand Down
Loading

0 comments on commit ce4aa07

Please sign in to comment.