Skip to content

Commit

Permalink
Merge branch 'openvinotoolkit:master' into mkulakow/max_length
Browse files Browse the repository at this point in the history
  • Loading branch information
michalkulakowski authored Jan 9, 2025
2 parents 5d527f0 + f1802f5 commit 9efa361
Show file tree
Hide file tree
Showing 222 changed files with 10,556 additions and 6,357 deletions.
29 changes: 20 additions & 9 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
- 'tests/python_tests/test_tokenizer.py'

'category: LLM':
- 'src/cpp/include/openvino/genai/llm_pipeline.hpp'
- 'src/cpp/src/llm_pipeline.cpp'
- 'src/cpp/src/lm_encoding.hpp'
- 'src/cpp/src/lm_encoding.cpp'
- 'src/cpp/src/llm_pipeline_base.hpp'
- 'src/cpp/src/llm_pipeline_static.hpp'
- 'src/cpp/src/llm_pipeline_static.cpp'
- 'src/cpp/src/text_callback_streamer.cpp'
- 'src/cpp/src/text_callback_streamer.hpp'
- 'src/python/py_llm_pipeline.cpp'
- 'tests/python_tests/test_generate_api.py'
- 'tests/python_tests/test_chat_generate_api.py'
- 'tests/python_tests/test_llm_pipeline.py'

'category: sampling':
- 'src/cpp/include/openvino/genai/generation_config.hpp'
Expand All @@ -35,6 +38,7 @@
- 'tests/cpp/logit_filtering.cpp'
- 'tests/cpp/generate_config.cpp'
- 'tests/cpp/sampler.cpp'
- 'tests/python_tests/test_sampling.py'

'category: LoRA':
- 'src/cpp/include/openvino/genai/lora_adapter.hpp'
Expand All @@ -54,9 +58,12 @@
- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
- 'src/cpp/src/whisper/**/*'
- 'src/cpp/src/whisper_generation_config.cpp'
- 'src/cpp/src/whisper_pipeline_base.hpp'
- 'src/cpp/src/whisper_pipeline.cpp'
- 'src/cpp/src/whisper_pipeline_static.cpp'
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_generate_api.py'
- 'tests/python_tests/test_whisper_pipeline.py'

'category: Python API':
- 'src/python/**/*'
Expand All @@ -65,10 +72,14 @@
- 'src/include/openvino/genai/visual_language/**/*'
- 'src/cpp/src/visual_language/**/*'
- 'src/python/py_vlm_pipeline.cpp'
- 'tests/python_tests/test_vlm_api.py'
- 'tests/python_tests/test_vlm_pipeline.py'

'category: speculative decoding':
- 'src/cpp/src/speculative_decoding/**/*'
- 'tests/cpp/speculative_decoding.cpp'

'category: prompt lookup':
- 'src/cpp/src/prompt_lookup/**/*'

'category: continuous batching':
- 'src/cpp/include/openvino/genai/cache_eviction.hpp'
Expand All @@ -91,19 +102,19 @@
- 'src/cpp/src/generation_handle.cpp'
- 'src/cpp/src/generation_stream.hpp'
- 'src/cpp/src/model_runner.hpp'
- 'src/cpp/src/paged_attention_transformations.cpp'
- 'src/cpp/src/paged_attention_transformations.hpp'
- 'src/cpp/src/utils/paged_attention_transformations.cpp'
- 'src/cpp/src/utils/paged_attention_transformations.hpp'
- 'src/cpp/src/scheduler.hpp'
- 'src/cpp/src/sequence_group.cpp'
- 'src/cpp/src/sequence_group.hpp'
- 'src/cpp/src/timer.hpp'
- 'src/python/py_continuous_batching_pipeline.cpp'
- 'tests/python_tests/test_cache_optimizations.py'
- 'tests/python_tests/test_preemption.py'
- 'tests/python_tests/test_sampling.py'
- 'tests/python_tests/test_continuous_batching.py'
- 'tests/python_tests/test_kv_cache_eviction.py'
- 'tests/cpp/block_allocator.cpp'
- 'tests/cpp/block_hash_store.cpp'
- 'tests/cpp/block_manager.cpp'
- 'tests/cpp/cache_eviction.cpp'
- 'tests/cpp/cache_manager.cpp'
- 'tests/cpp/device_config.cpp'
- 'tests/cpp/scheduler.cpp'
Expand Down
45 changes: 15 additions & 30 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ concurrency:
cancel-in-progress: true

env:
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241205_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241205_x86_64.tgz
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/w_openvino_toolkit_windows_2025.0.0.dev20241205_x86_64.zip
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241230_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241230_x86_64.tgz
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/w_openvino_toolkit_windows_2025.0.0.dev20241230_x86_64.zip
jobs:
cpp-multinomial-greedy_causal_lm-ubuntu:
runs-on: ubuntu-20.04-8-cores
Expand Down Expand Up @@ -53,17 +53,17 @@ jobs:
wget https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true -O adapter_model.safetensors
- run: >
. ./ov/setupvars.sh
&& timeout 25s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a
&& timeout 35s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a
env:
PYTHONPATH: "./build"
- run: >
. ./ov/setupvars.sh
&& timeout 25s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b
&& timeout 35s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b
env:
PYTHONPATH: "./build"
- run: >
. ./ov/setupvars.sh
&& timeout 25s ./build/samples/cpp/text_generation/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
&& timeout 35s ./build/samples/cpp/text_generation/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
| diff <(timeout 25s samples/python/text_generation/greedy_causal_lm.py ./open_llama_3b_v2/ "return 0") -
env:
PYTHONPATH: "./build"
Expand Down Expand Up @@ -491,7 +491,6 @@ jobs:
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install -r ./samples/requirements.txt
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat --task text-generation-with-past
- name: run and compare
run: |
source ./ov/setupvars.sh
Expand All @@ -505,36 +504,22 @@ jobs:
./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
./build/samples/cpp/text_generation/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
python ./samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_py.txt
python -c "
with open('predictions_greedy.txt', 'r') as f:
predicted_greedy = f.readline()
with open('predictions_prompt_lookup.txt', 'r') as f:
predicted_prompt_lookup = f.readline()
with open('predictions_py.txt', 'r') as f:
predicted_prompt_lookup_py = f.readline()
assert predicted_greedy == predicted_prompt_lookup
assert predicted_greedy == predicted_prompt_lookup_py
assert predicted_prompt_lookup == predicted_prompt_lookup_py
"
echo "Prompt lookup" passed
- name: run and compare (model with seq_length_axis = 1)
run: |
source ./ov/setupvars.sh
echo 'Code:```python
def add(a, b):
return a + b
```
Question: Can you please add 2 and 3
A:' > ./prompt.txt
./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
./build/samples/cpp/text_generation/greedy_causal_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_greedy.txt
python -c "
with open('predictions_greedy.txt', 'r') as f:
predicted_greedy = f.readline()
with open('predictions_prompt_lookup.txt', 'r') as f:
predicted_prompt_lookup = f.readline()
assert predicted_greedy == predicted_prompt_lookup
"
echo "Prompt lookup" passed
env:
PYTHONPATH: "./build/:$PYTHONPATH"
LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
cpp-Phi-1_5:
runs-on: ubuntu-20.04-16-cores
defaults:
Expand Down
202 changes: 202 additions & 0 deletions .github/workflows/genai-tools.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: GenAI tools

on:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- master
- 'releases/**'

permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions

concurrency:
# github.ref is not unique in post-commit
group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-llm-bench-python
cancel-in-progress: true

jobs:
openvino_download:
name: Download OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }}
ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }}
ov_version: ${{ steps.openvino_download.outputs.ov_version }}
timeout-minutes: 10
defaults:
run:
shell: bash
runs-on: aks-linux-2-cores-8gb
container:
image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0'
volumes:
- /mount:/mount
- ${{ github.workspace }}:${{ github.workspace }}

steps:
- uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
id: openvino_download
with:
platform: ubuntu22
commit_packages_to_provide: wheels
revision: latest_available_commit

llm_bench:
name: 'LLM bench tests'
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.11"]
needs: [ openvino_download ]
env:
OV_INSTALL_DIR: ${{ github.workspace }}/ov
SRC_DIR: ${{ github.workspace }}
LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench

steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Lint with flake8
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest black
# stop the build if there are Python syntax errors or undefined names
python -m flake8 ${{ env.LLM_BENCH_PYPATH }} --config=${{ env.LLM_BENCH_PYPATH }}/setup.cfg
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.OV_INSTALL_DIR }}
merge-multiple: true
- name: Install dependencies
run: |
python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
working-directory: ${{ env.OV_INSTALL_DIR }}
- name: Test native pytorch model
run: |
git clone --depth 1 https://huggingface.co/katuni4ka/tiny-random-qwen
python ./tools/llm_bench/benchmark.py -m tiny-random-qwen -d cpu -n 1 -f pt -ic 20
rm -rf tiny-random-qwen
env:
GIT_LFS_SKIP_SMUDGE: 0
- name: Test tiny-random-baichuan2 Optimum Intel
run: |
optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16/ -d cpu -n 1 --optimum -ic 10
rm -rf ./ov_models/tiny-random-baichuan2
- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov Optimum Intel
run: |
huggingface-cli download OpenVINO/LCM_Dreamshaper_v7-int8-ov --local-dir ov_models/lcm_dreamshaper_v7
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum --num_steps 4
- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov with GenAI
run: |
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --num_steps 4
- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov with GenAI and LoRA
run: |
wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 --num_steps 4
rm -rf ./ov_models/lcm_dreamshaper_v7/
- name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Decoding via GenAI
run: |
optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --assistant_confidence_threshold 0.4 -ic 20
python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --num_assistant_tokens 5 -ic 20
rm -rf ov_models/TinyLlama-1.1B-Chat-v1.0
- name: Test whisper-tiny via GenAI
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
cd multilingual_librispeech
git lfs pull -I /data/mls_polish/train/audio/3283_1447_000.tar.gz
mkdir data/mls_polish/train/audio/3283_1447_000
tar zxvf data/mls_polish/train/audio/3283_1447_000.tar.gz -C data/mls_polish/train/audio/3283_1447_000/
cd ..
optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ./ov_models/whisper-tiny
python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 --optimum
python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1
rm -rf ./ov_models/whisper-tiny
rm -rf multilingual_librispeech
- name: Text InternVL2-1B via GenAI
run: |
optimum-cli export openvino --model OpenGVLab/InternVL2-1B ./ov_models/internvl2-1B --task image-text-to-text --trust-remote-code
python ./tools/llm_bench/benchmark.py -m ./ov_models/internvl2-1B --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20
python ./tools/llm_bench/benchmark.py -m ./ov_models/internvl2-1B --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20 --optimum
rm -rf ./ov_models/internvl2-1B
wwb:
name: 'WWB tests'
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.11"]
needs: [ openvino_download ]
env:
OV_INSTALL_DIR: ${{ github.workspace }}/ov
SRC_DIR: ${{ github.workspace }}
WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark

steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Lint with flake8
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest black
# stop the build if there are Python syntax errors or undefined names
python -m flake8 ${{ env.WWB_PATH }} --config=${{ env.WWB_PATH }}/setup.cfg
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.OV_INSTALL_DIR }}
merge-multiple: true
- name: Install dependencies
run: |
python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install -r ${{ env.WWB_PATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install git+https://github.com/huggingface/optimum-intel.git@main#egg=optimum-intel
working-directory: ${{ env.OV_INSTALL_DIR }}
- name: WWB Tests
run: |
python -m pip install -v ${{ env.WWB_PATH }}
python -m pytest -v ${{ env.WWB_PATH }}/tests
Overall_Status:
name: ci/gha_overall_status_llm_bench
needs: [openvino_download, llm_bench, wwb]
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Check status of all jobs
if: >-
${{
contains(needs.*.result, 'failure') ||
contains(needs.*.result, 'cancelled')
}}
run: exit 1
2 changes: 1 addition & 1 deletion .github/workflows/job_vlm_sample_llava.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:
type: string

env:
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz

jobs:
visual_language_chat_sample-ubuntu-llava:
Expand Down
Loading

0 comments on commit 9efa361

Please sign in to comment.