Skip to content

Commit

Permalink
Merge branch 'main' into ea/olmo_support
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Apr 25, 2024
2 parents 02c8ec6 + 33fc7b7 commit c7a6d51
Show file tree
Hide file tree
Showing 26 changed files with 110 additions and 43 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
python -m pip install --upgrade pip
pip install cmake
pip install py-cpuinfo
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[neural-compressor,diffusers,tests]
pip install intel-extension-for-transformers
pip install peft
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_ipex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[ipex,tests]
- name: Test with Pytest
run: |
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/test_openvino_examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ on:
push:
paths:
- '.github/workflows/test_openvino_examples.yml'
- 'examples/openvino/*'
- 'examples/openvino/**'
pull_request:
paths:
- '.github/workflows/test_openvino_examples.yml'
- 'examples/openvino/*'
- 'examples/openvino/**'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
Expand All @@ -22,9 +22,9 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.10"]
python-version: ["3.8", "3.11"]

runs-on: ubuntu-20.04
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v2
Expand All @@ -35,12 +35,12 @@ jobs:

- name: Install dependencies
run: |
pip install optimum[openvino] jstyleson nncf pytest
pip install -r examples/openvino/audio-classification/requirements.txt
pip install -r examples/openvino/image-classification/requirements.txt
pip install -r examples/openvino/question-answering/requirements.txt
pip install -r examples/openvino/text-classification/requirements.txt
pip install .[openvino] jstyleson pytest
pip install -r examples/openvino/audio-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
pip install -r examples/openvino/image-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
pip install -r examples/openvino/question-answering/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
pip install -r examples/openvino/text-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
- name: Test examples
run: |
python -m pytest examples/openvino/test_examples.py
python -m pytest examples/openvino/test_examples.py
4 changes: 2 additions & 2 deletions .github/workflows/test_openvino_notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.10"]
python-version: ["3.8", "3.11"]

runs-on: ubuntu-20.04
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v2
Expand Down
3 changes: 2 additions & 1 deletion examples/openvino/audio-classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
datasets>=1.14.0
evaluate
librosa
torchaudio
torchaudio
accelerate
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version

from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments


logger = logging.getLogger(__name__)
Expand Down
1 change: 1 addition & 0 deletions examples/openvino/image-classification/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ datasets >= 1.8.0
torch >= 1.9.0
torchvision>=0.6.0
evaluate
accelerate
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version

from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments


logger = logging.getLogger(__name__)
Expand Down
1 change: 1 addition & 0 deletions examples/openvino/question-answering/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
datasets >= 1.8.0
torch >= 1.9.0
evaluate
accelerate
2 changes: 1 addition & 1 deletion examples/openvino/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from transformers.utils.versions import require_version
from utils_qa import postprocess_qa_predictions

from optimum.intel.openvino import OVConfig, OVTrainingArguments
from optimum.intel import OVConfig, OVTrainingArguments


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down
2 changes: 1 addition & 1 deletion examples/openvino/question-answering/trainer_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import torch.nn.functional as F
from transformers.trainer_utils import PredictionOutput

from optimum.intel.openvino.trainer import OVTrainer
from optimum.intel import OVTrainer


class QuestionAnsweringOVTrainer(OVTrainer):
Expand Down
3 changes: 2 additions & 1 deletion examples/openvino/text-classification/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ scipy
scikit-learn
protobuf
torch >= 1.3
evaluate
evaluate
accelerate
2 changes: 1 addition & 1 deletion examples/openvino/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version

from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down
24 changes: 12 additions & 12 deletions notebooks/openvino/optimum_openvino_inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"\n",
"# Load PyTorch model from the Hub and export to OpenVINO in the background\n",
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad\", export=True)\n",
Expand Down Expand Up @@ -182,7 +182,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
Expand Down Expand Up @@ -240,7 +240,7 @@
],
"source": [
"import torch\n",
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
Expand Down Expand Up @@ -324,7 +324,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model = OVModelForQuestionAnswering.from_pretrained(\n",
Expand Down Expand Up @@ -529,7 +529,7 @@
],
"source": [
"from IPython.display import Audio\n",
"from optimum.intel.openvino import OVModelForAudioClassification\n",
"from optimum.intel import OVModelForAudioClassification\n",
"from transformers import AutoFeatureExtractor, pipeline\n",
"from datasets import load_dataset\n",
"\n",
Expand Down Expand Up @@ -638,7 +638,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForCausalLM\n",
"from optimum.intel import OVModelForCausalLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model_id = \"helenai/gpt2-ov\"\n",
Expand Down Expand Up @@ -704,7 +704,7 @@
],
"source": [
"from IPython.display import Image\n",
"from optimum.intel.openvino import OVModelForImageClassification\n",
"from optimum.intel import OVModelForImageClassification\n",
"from transformers import AutoImageProcessor, pipeline\n",
"\n",
"model_id = \"helenai/microsoft-swin-tiny-patch4-window7-224-ov\"\n",
Expand Down Expand Up @@ -766,7 +766,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForMaskedLM\n",
"from optimum.intel import OVModelForMaskedLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model_id = \"helenai/bert-base-uncased-ov\"\n",
Expand Down Expand Up @@ -835,7 +835,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"# Load the model and tokenizer saved in Part 1 of this notebook. Or use the line below to load them from the hub\n",
Expand Down Expand Up @@ -890,7 +890,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForSeq2SeqLM\n",
"from optimum.intel import OVModelForSeq2SeqLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model_id = \"helenai/t5-small-ov\"\n",
Expand Down Expand Up @@ -998,7 +998,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForSequenceClassification\n",
"from optimum.intel import OVModelForSequenceClassification\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model_id = \"helenai/papluca-xlm-roberta-base-language-detection-ov\"\n",
Expand Down Expand Up @@ -1047,7 +1047,7 @@
}
],
"source": [
"from optimum.intel.openvino import OVModelForTokenClassification\n",
"from optimum.intel import OVModelForTokenClassification\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"model_id = \"helenai/dslim-bert-base-NER-ov-fp32\"\n",
Expand Down
4 changes: 2 additions & 2 deletions notebooks/openvino/question_answering_quantization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"import transformers\n",
"from evaluate import evaluator\n",
"from openvino.runtime import Core\n",
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
"from optimum.intel import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
"\n",
"transformers.logging.set_verbosity_error()\n",
Expand Down Expand Up @@ -286,7 +286,7 @@
"**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
"\n",
"```\n",
"from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
"from optimum.intel import OVConfig, OVQuantizationConfig\n",
"\n",
"ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
"quantizer = OVQuantizer.from_pretrained(model)\n",
Expand Down
2 changes: 1 addition & 1 deletion notebooks/openvino/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
optimum-intel[openvino, nncf]
optimum-intel[openvino]
datasets
evaluate[evaluator]
ipywidgets
Expand Down
2 changes: 1 addition & 1 deletion notebooks/openvino/stable_diffusion_optimization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"metadata": {},
"outputs": [],
"source": [
"from optimum.intel.openvino import OVStableDiffusionPipeline\n",
"from optimum.intel import OVStableDiffusionPipeline\n",
"from diffusers.training_utils import set_seed\n",
"from IPython.display import display"
]
Expand Down
50 changes: 49 additions & 1 deletion optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
from transformers.utils import is_tf_available

from optimum.exporters.onnx.config import TextDecoderOnnxConfig, TextDecoderWithPositionIdsOnnxConfig
from optimum.exporters.onnx.model_configs import GemmaOnnxConfig, LlamaOnnxConfig
from optimum.exporters.onnx.model_configs import FalconOnnxConfig, GemmaOnnxConfig, LlamaOnnxConfig
from optimum.exporters.tasks import TasksManager
from optimum.utils import DEFAULT_DUMMY_SHAPES
from optimum.utils.input_generators import (
DummyInputGenerator,
DummyPastKeyValuesGenerator,
DummyTextInputGenerator,
FalconDummyPastKeyValuesGenerator,
MistralDummyPastKeyValuesGenerator,
)
from optimum.utils.normalized_config import NormalizedTextConfig
Expand Down Expand Up @@ -443,3 +444,50 @@ class OrionOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
class OlmoOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
DEFAULT_ONNX_OPSET = 14
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig


class OVFalconDummyPastKeyValuesGenerator(FalconDummyPastKeyValuesGenerator):
def __init__(
self,
task: str,
normalized_config: NormalizedTextConfig,
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
random_batch_size_range: Optional[Tuple[int, int]] = None,
random_sequence_length_range: Optional[Tuple[int, int]] = None,
**kwargs,
):
super().__init__(
task=task,
normalized_config=normalized_config,
batch_size=batch_size,
sequence_length=sequence_length,
random_batch_size_range=random_batch_size_range,
random_sequence_length_range=random_sequence_length_range,
**kwargs,
)
if normalized_config.new_decoder_architecture:
self.num_kv_heads = normalized_config.num_attention_heads
else:
self.num_kv_heads = normalized_config.num_kv_heads if not normalized_config.multi_query else 1

self.head_dim = self.hidden_size // self.num_attention_heads


@register_in_tasks_manager(
"falcon",
*[
"feature-extraction",
"feature-extraction-with-past",
"question-answering",
"text-generation",
"text-generation-with-past",
"token-classification",
],
library_name="transformers",
)
class FalconOpenVINOConfig(FalconOnnxConfig):
DUMMY_INPUT_GENERATOR_CLASSES = (
OVFalconDummyPastKeyValuesGenerator,
) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
DUMMY_PKV_GENERATOR_CLASS = OVFalconDummyPastKeyValuesGenerator
9 changes: 9 additions & 0 deletions optimum/intel/generation/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,22 @@ def _reorder_cache(
"""
if self.config.model_type == "bloom":
return self._reorder_cache_bloom(past_key_values, beam_idx)
elif self.config.model_type == "gpt_bigcode":
return self._reorder_cache_gpt_bigcode(past_key_values, beam_idx)

# from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache
return tuple(
tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past)
for layer_past in past_key_values
)

# Copied from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM._reorder_cache
@staticmethod
def _reorder_cache_gpt_bigcode(
past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
) -> Tuple[Tuple[torch.Tensor]]:
return tuple(layer_past.index_select(0, beam_idx.to(layer_past.device)) for layer_past in past_key_values)

# Copied from transformers.models.bloom.modeling_bloom.BloomForCausalLM._reorder_cache
def _reorder_cache_bloom(
self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
Expand Down
4 changes: 3 additions & 1 deletion optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,9 @@ def __init__(
if isinstance(quantization_config, dict):
quantization_config = self._quantization_config_from_dict(quantization_config)
self.quantization_config = quantization_config
self.compression = None # A field for backward-compatability of training-time compression parameters
self.compression = kwargs.get(
"compression", None
) # A field for backward-compatability of training-time compression parameters
bits = self.quantization_config.bits if self.quantization_config else None
self.dtype = "int" + str(bits) if isinstance(bits, int) else dtype

Expand Down
Loading

0 comments on commit c7a6d51

Please sign in to comment.