Skip to content

Commit 635eb5a

Browse files
authored
Merge branch 'master' into ak/wwb_llamacpp
2 parents daf213d + 9a27715 commit 635eb5a

File tree

62 files changed

+883
-980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+883
-980
lines changed

.github/workflows/causal_lm_cpp.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ concurrency:
1616
cancel-in-progress: true
1717

1818
env:
19-
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241224_x86_64.tgz
20-
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
21-
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241224_x86_64.tgz
22-
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
19+
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241230_x86_64.tgz
20+
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz
21+
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241230_x86_64.tgz
22+
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/w_openvino_toolkit_windows_2025.0.0.dev20241230_x86_64.zip
2323
jobs:
2424
cpp-multinomial-greedy_causal_lm-ubuntu:
2525
runs-on: ubuntu-20.04-8-cores

.github/workflows/job_vlm_sample_llava.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ on:
1111
type: string
1212

1313
env:
14-
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
14+
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz
1515

1616
jobs:
1717
visual_language_chat_sample-ubuntu-llava:

.github/workflows/lcm_dreamshaper_cpp.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ concurrency:
1818

1919
env:
2020
PYTHON_VERSION: '3.9'
21-
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
22-
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
21+
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241230_x86_64.tgz
22+
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17726-9ab2c1a18e7/w_openvino_toolkit_windows_2025.0.0.dev20241230_x86_64.zip
2323
OV_INSTALL_DIR: ${{ github.workspace }}/ov
2424

2525
jobs:

.github/workflows/linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@ jobs:
109109
merge-multiple: true
110110

111111
- name: CMake Build
112-
run: |
112+
run: |
113113
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
114114
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ${{ env.SRC_DIR}} -B ${{ env.BUILD_DIR }}
115-
cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc)
115+
cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose
116116
cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }}
117117
118118
- name: Pack Artifacts

.github/workflows/mac.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: macOS (12, Python 3.9)
1+
name: macOS (12, Python 3.10)
22
on:
33
workflow_dispatch:
44
pull_request:
@@ -16,7 +16,7 @@ concurrency:
1616
cancel-in-progress: true
1717

1818
env:
19-
PYTHON_VERSION: '3.9'
19+
PYTHON_VERSION: '3.10'
2020
OV_BRANCH: master
2121
OV_TARBALL: ''
2222

@@ -219,7 +219,7 @@ jobs:
219219
run: |
220220
source ${OV_INSTALL_DIR}/setupvars.sh
221221
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
222-
cmake --build ./build/ --config Release -j
222+
cmake --build ./build/ --config Release --parallel --verbose
223223
224224
- name: Test bindings
225225
run: |
@@ -284,7 +284,7 @@ jobs:
284284
run: |
285285
source ${OV_INSTALL_DIR}/setupvars.sh
286286
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
287-
cmake --build ./build/ --config Release --target py_openvino_genai -j
287+
cmake --build ./build/ --config Release --target py_openvino_genai --parallel --verbose
288288
289289
- name: Test bindings
290290
run: |
@@ -350,7 +350,7 @@ jobs:
350350
run: |
351351
source ${OV_INSTALL_DIR}/setupvars.sh
352352
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/
353-
cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j
353+
cmake --build ./build/ --config ${{ matrix.build-type }} --target package --parallel --verbose
354354
355355
- name: Build and Install dependencies
356356
run: |

.github/workflows/stable_diffusion_1_5_cpp.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ jobs:
122122
source openvino_sd_cpp/bin/activate
123123
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --weight-format fp16 --task stable-diffusion models/dreamlike-art-dreamlike-anime-1.0/FP16
124124
wget -O ./models/soulcard.safetensors https://civitai.com/api/download/models/72591
125+
env:
126+
HF_HUB_ENABLE_HF_TRANSFER: 1
125127

126128
- name: Run text2image app
127129
run: |
@@ -198,6 +200,8 @@ jobs:
198200
. "./openvino_sd_cpp/Scripts/Activate.ps1"
199201
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike-art-dreamlike-anime-1.0/FP16
200202
Invoke-WebRequest -Uri 'https://civitai.com/api/download/models/72591' -OutFile 'models/soulcard.safetensors'
203+
env:
204+
HF_HUB_ENABLE_HF_TRANSFER: 1
201205

202206
- name: Run text2image app
203207
run: |

.github/workflows/windows.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ jobs:
230230
run: |
231231
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
232232
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
233-
cmake --build ./build/ --config Release -j
233+
cmake --build ./build/ --config Release --parallel --verbose
234234
235235
- name: Test bindings
236236
run: |
@@ -295,7 +295,7 @@ jobs:
295295
run: |
296296
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
297297
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
298-
cmake --build ./build/ --config Release --target py_openvino_genai -j
298+
cmake --build ./build/ --config Release --target py_openvino_genai --parallel --verbose
299299
300300
- name: Test bindings
301301
run: |
@@ -360,7 +360,7 @@ jobs:
360360
run: |
361361
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
362362
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
363-
cmake --build ./build/ --config Release --target py_openvino_genai -j
363+
cmake --build ./build/ --config Release --target py_openvino_genai --parallel --verbose
364364
365365
- name: Test bindings
366366
run: |

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ if(MSVC AND MSVC_VERSION GREATER_EQUAL 1930 AND MSVC_VERSION LESS 1941)
8585
add_compile_definitions(_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
8686
endif()
8787

88-
8988
add_subdirectory(thirdparty)
9089
add_subdirectory(src)
9190
if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples")

samples/deployment-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
33
openvino_genai~=2025.0.0.0.dev
44
librosa==0.10.2.post1 # For Whisper
5-
pillow==11.0.0 # Image processing for VLMs
5+
pillow==11.1.0 # Image processing for VLMs

samples/export-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ diffusers==0.32.1 # For image generation pipelines
1010
timm==1.0.12 # For exporting InternVL2
1111
torchvision # For visual language models
1212
transformers>=4.43 # For Whisper
13+
hf_transfer # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1

src/cpp/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,13 @@ ov_genai_build_jinja2cpp()
5959
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")
6060

6161
set(TARGET_NAME openvino_genai)
62+
6263
add_library(${TARGET_NAME} SHARED ${SOURCE_FILES})
64+
add_library(openvino::genai ALIAS ${TARGET_NAME})
65+
6366
if(TARGET openvino_tokenizers)
6467
add_dependencies(${TARGET_NAME} openvino_tokenizers)
6568
endif()
66-
add_library(openvino::genai ALIAS ${TARGET_NAME})
6769

6870
target_include_directories(${TARGET_NAME}
6971
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:runtime/include>"
@@ -81,6 +83,7 @@ set_target_properties(${TARGET_NAME} PROPERTIES
8183
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
8284
RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
8385
)
86+
8487
# Extract two last digits from OpenVINOGenAI_VERSION_MAJOR because SOVERSION can only contain up to 4 symbols.
8588
string(REGEX MATCH [=[[0-9][0-9]$]=] MAJOR_SUFFIX ${OpenVINOGenAI_VERSION_MAJOR})
8689
if(DEFINED PY_BUILD_CMAKE_PACKAGE_NAME AND LINUX)

src/cpp/src/continuous_batching_adapter.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase {
3333
const std::string& device,
3434
const ov::AnyMap& plugin_config
3535
): LLMPipelineImplBase{tokenizer, GenerationConfig()}, m_impl{
36-
models_path.string(),
36+
models_path,
3737
tokenizer,
3838
scheduler_config,
3939
device,
@@ -64,7 +64,7 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase {
6464
const std::string& device,
6565
const ov::AnyMap& plugin_config
6666
): LLMPipelineImplBase{Tokenizer(models_path), GenerationConfig()}, m_impl{
67-
models_path.string(),
67+
models_path,
6868
m_tokenizer,
6969
scheduler_config,
7070
device,

src/cpp/src/continuous_batching_impl.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,14 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(
2323
m_generation_config = generation_config;
2424
m_is_validation_mode_enabled = is_validation_mode_enabled;
2525

26-
ov::Core core;
27-
28-
auto [core_properties, compile_properties] = utils::split_core_compile_config(properties);
29-
core.set_property(core_properties);
30-
31-
DeviceConfig device_config(core, scheduler_config, device, compile_properties);
26+
ov::Core core = utils::singleton_core();
27+
DeviceConfig device_config(core, scheduler_config, device, properties);
3228

3329
bool is_need_per_layer_cache_control = scheduler_config.use_cache_eviction;
3430
utils::apply_paged_attention_transformations(model, device_config, is_need_per_layer_cache_control);
31+
utils::apply_gather_before_matmul_transformation(model);
3532

36-
initialize_pipeline(model, scheduler_config, compile_properties, device_config, core);
33+
initialize_pipeline(model, scheduler_config, properties, device_config, core);
3734
}
3835

3936
void ContinuousBatchingPipeline::ContinuousBatchingImpl::_pull_awaiting_requests() {
@@ -199,7 +196,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
199196
step_count++;
200197
#endif
201198

202-
// process generation_config.echo parameetr
199+
// process generation_config.echo parameter
203200
_fill_prompt_log_probs(m_requests, logits);
204201

205202
SamplerOutput sampler_output;
@@ -448,7 +445,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::_fill_prompt_log_probs(
448445
const float * logits_data = logits.data<float>();
449446
ov::Shape logits_shape = logits.get_shape();
450447
OPENVINO_ASSERT(logits_shape.size() == 3);
451-
size_t batch_seq_len = logits_shape[1], vocab_size = logits_shape[2];
448+
size_t vocab_size = logits_shape[2];
452449
for (size_t sequence_group_id = 0, currently_processed_tokens = 0; sequence_group_id < sequence_groups.size(); ++sequence_group_id) {
453450
SequenceGroup::Ptr sequence_group = sequence_groups[sequence_group_id];
454451
// requests not scheduled, in decoding phase or not echoing are not processed
@@ -458,26 +455,25 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::_fill_prompt_log_probs(
458455

459456
size_t num_running_sequences = sequence_group->num_running_seqs();
460457
OPENVINO_ASSERT(num_running_sequences == 1);
461-
size_t actual_seq_len = sequence_group->get_num_scheduled_tokens();
462-
size_t padded_amount_of_processed_tokens = std::max(actual_seq_len, batch_seq_len);
458+
size_t output_seq_len = sequence_group->get_output_seq_len();
463459

464460
const float * sequence_group_logits_data = logits_data + vocab_size * currently_processed_tokens;
465461

466462
size_t num_prompt_tokens_processed = sequence_group->get_num_processed_tokens();
467-
OPENVINO_ASSERT(num_prompt_tokens_processed + actual_seq_len <= sequence_group->get_prompt_len());
463+
OPENVINO_ASSERT(num_prompt_tokens_processed + output_seq_len <= sequence_group->get_prompt_len());
468464

469465
// if we processed the whole prompt we don't include last logprob as it will be processed by the sampler (it's already completion)
470466
// otherwise we include it as it will be used in the next part of the prompt
471467
int exclude_last_logprob = 1;
472-
if (num_prompt_tokens_processed + actual_seq_len < sequence_group->get_prompt_len())
468+
if (num_prompt_tokens_processed + output_seq_len < sequence_group->get_prompt_len())
473469
exclude_last_logprob = 0;
474470

475471
// if we start processing the prompt we add "fake" log prob for the first position (begin of sequence)
476472
if (num_prompt_tokens_processed == 0)
477473
sequence_group->append_prompt_log_prob(1.0);
478474

479475
for (int token_logits_offset = 0, token_id_offset = num_prompt_tokens_processed + 1;
480-
token_logits_offset < actual_seq_len - exclude_last_logprob;
476+
token_logits_offset < output_seq_len - exclude_last_logprob;
481477
token_logits_offset++, token_id_offset++) {
482478

483479
const float* token_logits = (sequence_group_logits_data + token_logits_offset * vocab_size);
@@ -502,7 +498,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::_fill_prompt_log_probs(
502498

503499
sequence_group->append_prompt_log_prob(token_logit - max_value - log_sum);
504500
}
505-
currently_processed_tokens += padded_amount_of_processed_tokens * num_running_sequences;
501+
currently_processed_tokens += output_seq_len * num_running_sequences;
506502
// For max_new_tokens == 0, we don't reach sampling so need to notify handle separately
507503
if(sequence_group->get_sampling_parameters().max_new_tokens == 0) {
508504
sequence_group->notify_handle_echo_only();

src/cpp/src/continuous_batching_pipeline.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
4848
auto draft_model_desr = extract_draft_model_from_config(properties_without_draft_model);
4949
auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
5050

51-
std::filesystem::path openvino_model_name = "openvino_model.xml";
52-
auto model = utils::singleton_core().read_model((models_path / openvino_model_name).string());
51+
auto model = utils::singleton_core().read_model(models_path / "openvino_model.xml", {}, properties);
5352
auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties);
5453
auto generation_config = utils::from_config_json_if_exists(models_path);
5554

@@ -74,7 +73,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
7473
auto draft_model_desr = extract_draft_model_from_config(properties_without_draft_model);
7574
auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
7675
std::filesystem::path openvino_model_name = "openvino_model.xml";
77-
auto model = utils::singleton_core().read_model((models_path / openvino_model_name).string());
76+
auto model = utils::singleton_core().read_model(models_path / openvino_model_name, {}, properties_without_draft_model);
7877
auto generation_config = utils::from_config_json_if_exists(models_path);
7978

8079
if (is_prompt_lookup_enabled) {

src/cpp/src/generation_config.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,9 @@ void GenerationConfig::validate() const {
230230
OPENVINO_ASSERT(temperature > 0, "When 'do_sample' is true, temperature must be a strictly positive float, but got ", temperature);
231231
} else {
232232
// parameters requiring multinomial
233-
OPENVINO_ASSERT(top_k == std::numeric_limits<size_t>::max(), "When 'do_sample' is false, top_k must be max of size_t, but got ", top_k);
234-
OPENVINO_ASSERT(top_p == 1.0f, "When 'do_sample' is false, top_p must be 1.0f, but got ", top_p);
235-
OPENVINO_ASSERT(temperature == 1.0f, "When 'do_sample' is false, temperature must be a 1.0f, but got ", temperature);
233+
// OPENVINO_ASSERT(top_k == std::numeric_limits<size_t>::max(), "When 'do_sample' is false, top_k must be max of size_t, but got ", top_k);
234+
// OPENVINO_ASSERT(top_p == 1.0f, "When 'do_sample' is false, top_p must be 1.0f, but got ", top_p);
235+
// OPENVINO_ASSERT(temperature == 1.0f, "When 'do_sample' is false, temperature must be a 1.0f, but got ", temperature);
236236
}
237237

238238
if (is_beam_search()) {
@@ -252,10 +252,10 @@ void GenerationConfig::validate() const {
252252
}
253253
} else {
254254
// parameters requiring beam search
255-
OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups);
256-
OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits<size_t>::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size);
257-
OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to ", diversity_penalty, " (default is 0.0f), which is supported only by beam search sampling");
258-
OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to ", length_penalty, " (default is 1.0f), which is supported only by beam search sampling");
255+
// OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups);
256+
// OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits<size_t>::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size);
257+
// OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to ", diversity_penalty, " (default is 0.0f), which is supported only by beam search sampling");
258+
// OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to ", length_penalty, " (default is 1.0f), which is supported only by beam search sampling");
259259
}
260260

261261
// assistant generation

0 commit comments

Comments
 (0)