Skip to content

Commit d8afcdf

Browse files
committed
Merge remote-tracking branch 'upstream/master' into 2025.1.0-nightly
2 parents 6a399c8 + a3031f3 commit d8afcdf

File tree

6 files changed

+19
-36
lines changed

6 files changed

+19
-36
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if(UNIX AND NOT (APPLE OR ANDROID OR CYGWIN))
3030
endif()
3131

3232
project(OpenVINOGenAI
33-
VERSION 2025.0.0.0
33+
VERSION 2025.1.0.0
3434
DESCRIPTION "OpenVINO GenAI"
3535
HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai"
3636
LANGUAGES CXX C)

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "openvino-genai"
3-
version = "2025.0.0.0"
3+
version = "2025.1.0.0"
44
description = "Library of the most popular Generative AI model pipelines, optimized execution methods, and samples"
55
requires-python = ">=3.9"
66
readme = { file = "src/README.md", content-type="text/markdown" }
@@ -30,7 +30,7 @@ classifiers = [
3030
"Programming Language :: Python :: Implementation :: CPython"
3131
]
3232
dependencies = [
33-
"openvino_tokenizers~=2025.0.0.0.dev"
33+
"openvino_tokenizers~=2025.1.0.0.dev"
3434
]
3535

3636
[tool.py-build-cmake.module]
@@ -52,7 +52,7 @@ options = {"BUILD_TOKENIZERS" = "OFF"}
5252
[build-system]
5353
requires = [
5454
"py-build-cmake==0.3.4",
55-
"openvino~=2025.0.0.0.dev",
55+
"openvino~=2025.1.0.0.dev",
5656
"pybind11-stubgen==2.5.1",
5757
"cmake~=3.23.0"
5858
]

tests/python_tests/test_sampling.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,15 @@ def test_stop_strings(tmp_path, generation_config):
6565
'What is OpenVINO?',
6666
'table is made of',
6767
'The Sun is yellow because',
68-
'你好! 你好嗎?'
68+
'你好! 你好嗎?',
6969
'I have an interview about product speccing with the company Weekend Health. Give me an example of a question they might ask with regards about a new feature'
7070
])
7171
@pytest.mark.parametrize("use_cb", [True, False])
7272
def test_greedy(tmp_path, generation_config, prompt, use_cb):
7373
model_id : str = "katuni4ka/tiny-random-phi3"
74+
if sys.platform.startswith('win') and prompt.startswith('你'):
75+
pytest.skip("For unknown reason this prompt fails on Win")
76+
7477
run_llm_pipeline_with_ref(model_id=model_id,
7578
prompts=[prompt],
7679
generation_config=generation_config,

tools/llm_bench/benchmark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ def get_argprser():
147147
parser.add_argument("--draft_device", required=False, default=None, help="Inference device for Speculative decoding of draft model")
148148
parser.add_argument("--draft_cb_config", required=False, default=None,
149149
help="Path to file with Continuous Batching Scheduler settings or dict for Speculative decoding of draft model")
150-
parser.add_argument("--num_assistant_tokens", required=False, default=None, help="Config option num_assistant_tokens for Speculative decoding")
150+
parser.add_argument("--num_assistant_tokens", required=False, default=None, help="Config option num_assistant_tokens for Speculative decoding", type=int)
151151
parser.add_argument("--assistant_confidence_threshold", required=False, default=None,
152-
help="Config option assistant_confidence_threshold for Speculative decoding")
152+
help="Config option assistant_confidence_threshold for Speculative decoding", type=float)
153153
parser.add_argument(
154154
'--end_token_stopping',
155155
action='store_true',

tools/llm_bench/llm_bench_utils/ov_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,13 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
243243

244244
draft_model_path = kwargs.get("draft_model", '')
245245
cb = kwargs.get("use_cb", False)
246-
if cb or draft_model_path:
246+
cb_config = kwargs.get("cb_config")
247+
use_streamer_metrics = False
248+
if cb or cb_config is not None or draft_model_path:
247249
log.info("Continuous Batching mode activated")
248-
ov_config["scheduler_config"] = get_scheduler_config_genai(kwargs.get("cb_config"))
250+
ov_config["scheduler_config"] = get_scheduler_config_genai(cb_config)
251+
252+
use_streamer_metrics = not openvino_genai.get_version().startswith("2025.") or draft_model_path
249253

250254
if draft_model_path:
251255
if not Path(draft_model_path).exists():
@@ -292,7 +296,7 @@ def get_tokens(self):
292296

293297
def get_time_list(self):
294298
return self.token_generation_time
295-
streamer = TokenStreamer(llm_pipe.get_tokenizer()) if cb or draft_model_path else None
299+
streamer = TokenStreamer(llm_pipe.get_tokenizer()) if use_streamer_metrics else None
296300

297301
return llm_pipe, tokenizer, end - start, streamer, True
298302

tools/llm_bench/task/text_generation.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,6 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
181181
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
182182
f"is different from md5 of the {num - 1} iteration {prev_md5}")
183183
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
184-
if not args.get("use_cb", False):
185-
if num == 1:
186-
# if the device is CPU, throw exception
187-
if args['devices'].lower().startswith('cpu') is True:
188-
assert (result_md5_list == prev_md5)
189-
else:
190-
# throw exception
191-
assert (result_md5_list == prev_md5)
192184
else:
193185
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
194186
if bench_hook is not None:
@@ -231,10 +223,10 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
231223
if args.get('draft_model', ''):
232224
config_info = "Speculative decoding config: "
233225
if args.get('num_assistant_tokens', None):
234-
gen_config.num_assistant_tokens = args['num_assistant_tokens']
226+
gen_config.num_assistant_tokens = int(args['num_assistant_tokens'])
235227
config_info += f" num_assistant_tokens {gen_config.num_assistant_tokens}"
236228
if args.get('assistant_confidence_threshold', None):
237-
gen_config.assistant_confidence_threshold = args['assistant_confidence_threshold']
229+
gen_config.assistant_confidence_threshold = float(args['assistant_confidence_threshold'])
238230
config_info += f" assistant_confidence_threshold {gen_config.assistant_confidence_threshold}"
239231
log.info(config_info)
240232
start = time.perf_counter()
@@ -339,14 +331,6 @@ def token_printer():
339331
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
340332
f"is different from md5 of the {num - 1} iteration {prev_md5}")
341333
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
342-
if not args.get("use_cb", False):
343-
if num == 1:
344-
# if the device is CPU, throw exception
345-
if args['devices'].lower().startswith('cpu') is True:
346-
assert (result_md5_list == prev_md5)
347-
else:
348-
# throw exception
349-
assert (result_md5_list == prev_md5)
350334
else:
351335
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
352336

@@ -461,14 +445,6 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
461445
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
462446
f"is different from md5 of the {num - 1} iteration {prev_md5}")
463447
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
464-
if not args.get("use_cb", False):
465-
if num == 1:
466-
# if the device is CPU, throw exception
467-
if args['devices'].lower().startswith('cpu') is True:
468-
assert (result_md5_list == prev_md5)
469-
else:
470-
# throw exception
471-
assert (result_md5_list == prev_md5)
472448
else:
473449
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
474450
streamer.reset()

0 commit comments

Comments
 (0)