Skip to content

Commit

Permalink
Merge branch 'master' into xufang/speculative_decoding_profile
Browse files Browse the repository at this point in the history
  • Loading branch information
xufang-lisa committed Jan 3, 2025
2 parents ffdad03 + 482fa79 commit f0f70ab
Show file tree
Hide file tree
Showing 102 changed files with 2,872 additions and 2,118 deletions.
29 changes: 20 additions & 9 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
- 'tests/python_tests/test_tokenizer.py'

'category: LLM':
- 'src/cpp/include/openvino/genai/llm_pipeline.hpp'
- 'src/cpp/src/llm_pipeline.cpp'
- 'src/cpp/src/lm_encoding.hpp'
- 'src/cpp/src/lm_encoding.cpp'
- 'src/cpp/src/llm_pipeline_base.hpp'
- 'src/cpp/src/llm_pipeline_static.hpp'
- 'src/cpp/src/llm_pipeline_static.cpp'
- 'src/cpp/src/text_callback_streamer.cpp'
- 'src/cpp/src/text_callback_streamer.hpp'
- 'src/python/py_llm_pipeline.cpp'
- 'tests/python_tests/test_generate_api.py'
- 'tests/python_tests/test_chat_generate_api.py'
- 'tests/python_tests/test_llm_pipeline.py'

'category: sampling':
- 'src/cpp/include/openvino/genai/generation_config.hpp'
Expand All @@ -35,6 +38,7 @@
- 'tests/cpp/logit_filtering.cpp'
- 'tests/cpp/generate_config.cpp'
- 'tests/cpp/sampler.cpp'
- 'tests/python_tests/test_sampling.py'

'category: LoRA':
- 'src/cpp/include/openvino/genai/lora_adapter.hpp'
Expand All @@ -54,9 +58,12 @@
- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
- 'src/cpp/src/whisper/**/*'
- 'src/cpp/src/whisper_generation_config.cpp'
- 'src/cpp/src/whisper_pipeline_base.hpp'
- 'src/cpp/src/whisper_pipeline.cpp'
- 'src/cpp/src/whisper_pipeline_static.cpp'
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_generate_api.py'
- 'tests/python_tests/test_whisper_pipeline.py'

'category: Python API':
- 'src/python/**/*'
Expand All @@ -65,10 +72,14 @@
- 'src/include/openvino/genai/visual_language/**/*'
- 'src/cpp/src/visual_language/**/*'
- 'src/python/py_vlm_pipeline.cpp'
- 'tests/python_tests/test_vlm_api.py'
- 'tests/python_tests/test_vlm_pipeline.py'

'category: speculative decoding':
- 'src/cpp/src/speculative_decoding/**/*'
- 'tests/cpp/speculative_decoding.cpp'

'category: prompt lookup':
- 'src/cpp/src/prompt_lookup/**/*'

'category: continuous batching':
- 'src/cpp/include/openvino/genai/cache_eviction.hpp'
Expand All @@ -91,19 +102,19 @@
- 'src/cpp/src/generation_handle.cpp'
- 'src/cpp/src/generation_stream.hpp'
- 'src/cpp/src/model_runner.hpp'
- 'src/cpp/src/paged_attention_transformations.cpp'
- 'src/cpp/src/paged_attention_transformations.hpp'
- 'src/cpp/src/utils/paged_attention_transformations.cpp'
- 'src/cpp/src/utils/paged_attention_transformations.hpp'
- 'src/cpp/src/scheduler.hpp'
- 'src/cpp/src/sequence_group.cpp'
- 'src/cpp/src/sequence_group.hpp'
- 'src/cpp/src/timer.hpp'
- 'src/python/py_continuous_batching_pipeline.cpp'
- 'tests/python_tests/test_cache_optimizations.py'
- 'tests/python_tests/test_preemption.py'
- 'tests/python_tests/test_sampling.py'
- 'tests/python_tests/test_continuous_batching.py'
- 'tests/python_tests/test_kv_cache_eviction.py'
- 'tests/cpp/block_allocator.cpp'
- 'tests/cpp/block_hash_store.cpp'
- 'tests/cpp/block_manager.cpp'
- 'tests/cpp/cache_eviction.cpp'
- 'tests/cpp/cache_manager.cpp'
- 'tests/cpp/device_config.cpp'
- 'tests/cpp/scheduler.cpp'
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ concurrency:
cancel-in-progress: true

env:
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241224_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241224_x86_64.tgz
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241230_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241230_x86_64.tgz
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/w_openvino_toolkit_windows_2025.0.0.dev20241230_x86_64.zip
jobs:
cpp-multinomial-greedy_causal_lm-ubuntu:
runs-on: ubuntu-20.04-8-cores
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/job_vlm_sample_llava.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:
type: string

env:
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz

jobs:
visual_language_chat_sample-ubuntu-llava:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ concurrency:

env:
PYTHON_VERSION: '3.9'
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/w_openvino_toolkit_windows_2025.0.0.dev20241230_x86_64.zip
OV_INSTALL_DIR: ${{ github.workspace }}/ov

jobs:
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ jobs:
merge-multiple: true

- name: CMake Build
run: |
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ${{ env.SRC_DIR}} -B ${{ env.BUILD_DIR }}
cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc)
cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose
cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }}
- name: Pack Artifacts
Expand Down Expand Up @@ -268,9 +268,9 @@ jobs:
matrix:
test:
- name: 'Whisper'
cmd: 'tests/python_tests/test_whisper_generate_api.py'
cmd: 'tests/python_tests/test_whisper_pipeline.py'
- name: 'LLM & VLM'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
defaults:
run:
shell: bash
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ jobs:
if: |
always() &&
(needs.openvino_download.outputs.status == 'success' || needs.openvino_build.result == 'success')
timeout-minutes: 90
timeout-minutes: 120
defaults:
run:
shell: bash
Expand Down Expand Up @@ -219,7 +219,7 @@ jobs:
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release -j
cmake --build ./build/ --config Release --parallel --verbose
- name: Test bindings
run: |
Expand All @@ -235,7 +235,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -284,13 +284,13 @@ jobs:
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --target py_openvino_genai -j
cmake --build ./build/ --config Release --target py_openvino_genai --parallel --verbose
- name: Test bindings
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
env:
PYTHONPATH: "./build/:$PYTHONPATH"

Expand All @@ -300,7 +300,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_package:
name: OpenVINO genai extension (install to OpenVINO package)
Expand Down Expand Up @@ -350,7 +350,7 @@ jobs:
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/
cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j
cmake --build ./build/ --config ${{ matrix.build-type }} --target package --parallel --verbose
- name: Build and Install dependencies
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/stable_diffusion_1_5_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ jobs:
source openvino_sd_cpp/bin/activate
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --weight-format fp16 --task stable-diffusion models/dreamlike-art-dreamlike-anime-1.0/FP16
wget -O ./models/soulcard.safetensors https://civitai.com/api/download/models/72591
env:
HF_HUB_ENABLE_HF_TRANSFER: 1

- name: Run text2image app
run: |
Expand Down Expand Up @@ -198,6 +200,8 @@ jobs:
. "./openvino_sd_cpp/Scripts/Activate.ps1"
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike-art-dreamlike-anime-1.0/FP16
Invoke-WebRequest -Uri 'https://civitai.com/api/download/models/72591' -OutFile 'models/soulcard.safetensors'
env:
HF_HUB_ENABLE_HF_TRANSFER: 1

- name: Run text2image app
run: |
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release -j
cmake --build ./build/ --config Release --parallel --verbose
- name: Test bindings
run: |
Expand All @@ -245,7 +245,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -295,13 +295,13 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --target py_openvino_genai -j
cmake --build ./build/ --config Release --target py_openvino_genai --parallel --verbose
- name: Test bindings
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand All @@ -310,7 +310,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_python_lib_vlm:
name: OpenVINO genai VLM tests (cmake + wheel)
Expand Down Expand Up @@ -360,13 +360,13 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --target py_openvino_genai -j
cmake --build ./build/ --config Release --target py_openvino_genai --parallel --verbose
- name: Test bindings
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_vlm_api.py
python -m pytest -v ./tests/python_tests/test_vlm_pipeline.py
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ if(MSVC AND MSVC_VERSION GREATER_EQUAL 1930 AND MSVC_VERSION LESS 1941)
add_compile_definitions(_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
endif()


add_subdirectory(thirdparty)
add_subdirectory(src)
if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ options = {"BUILD_TOKENIZERS" = "OFF"}

[build-system]
requires = [
"py-build-cmake==0.3.3",
"py-build-cmake==0.3.4",
"openvino~=2025.0.0.0.dev",
"pybind11-stubgen==2.5.1",
"cmake~=3.23.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ int main(int argc, char* argv[]) try {
config.max_new_tokens = 20;
config.num_beam_groups = 3;
config.num_beams = 15;
config.diversity_penalty = 1.0f;
config.num_return_sequences = config.num_beams;

// Since the streamer is set, the results will
Expand Down
3 changes: 2 additions & 1 deletion samples/export-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
numpy<2.0.0; sys_platform == 'darwin'
einops==0.8.0 # For Qwen
transformers_stream_generator==0.0.5 # For Qwen
diffusers==0.31.0 # For image generation pipelines
diffusers==0.32.1 # For image generation pipelines
timm==1.0.12 # For exporting InternVL2
torchvision # For visual language models
transformers>=4.43 # For Whisper
hf_transfer # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def main():
config.max_new_tokens = 20
config.num_beam_groups = 3
config.num_beams = 15
config.diversity_penalty = 1
config.num_return_sequences = config.num_beams

beams = pipe.generate(args.prompts, config)
Expand Down
5 changes: 4 additions & 1 deletion src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ ov_genai_build_jinja2cpp()
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")

set(TARGET_NAME openvino_genai)

add_library(${TARGET_NAME} SHARED ${SOURCE_FILES})
add_library(openvino::genai ALIAS ${TARGET_NAME})

if(TARGET openvino_tokenizers)
add_dependencies(${TARGET_NAME} openvino_tokenizers)
endif()
add_library(openvino::genai ALIAS ${TARGET_NAME})

target_include_directories(${TARGET_NAME}
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:runtime/include>"
Expand All @@ -81,6 +83,7 @@ set_target_properties(${TARGET_NAME} PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
)

# Extract two last digits from OpenVINOGenAI_VERSION_MAJOR because SOVERSION can only contain up to 4 symbols.
string(REGEX MATCH [=[[0-9][0-9]$]=] MAJOR_SUFFIX ${OpenVINOGenAI_VERSION_MAJOR})
if(DEFINED PY_BUILD_CMAKE_PACKAGE_NAME AND LINUX)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ struct PipelineMetrics {

class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
protected:
class ImplInterface;
class IContinuousBatchingPipeline;
class ContinuousBatchingImpl;

class ContinuousBatchingForSpeculativeDecodingImpl;
class ContinuousBatchingForPromptLookupImpl;
class SpeculativeDecodingImpl;
Expand All @@ -64,7 +65,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
friend class SpeculativeDecodingImpl;
friend class PromptLookupImpl;

std::shared_ptr<ImplInterface> m_impl;
std::shared_ptr<IContinuousBatchingPipeline> m_impl;

ContinuousBatchingPipeline() = default;

Expand Down
Loading

0 comments on commit f0f70ab

Please sign in to comment.