Skip to content

Commit

Permalink
[LLM_BENCH] relax md5 checks and allow pass cb config without use_cb (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Jan 16, 2025
1 parent 8824c6f commit 29e85cf
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 31 deletions.
4 changes: 2 additions & 2 deletions tools/llm_bench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ def get_argprser():
parser.add_argument("--draft_device", required=False, default=None, help="Inference device for Speculative decoding of draft model")
parser.add_argument("--draft_cb_config", required=False, default=None,
help="Path to file with Continuous Batching Scheduler settings or dict for Speculative decoding of draft model")
parser.add_argument("--num_assistant_tokens", required=False, default=None, help="Config option num_assistant_tokens for Speculative decoding")
parser.add_argument("--num_assistant_tokens", required=False, default=None, help="Config option num_assistant_tokens for Speculative decoding", type=int)
parser.add_argument("--assistant_confidence_threshold", required=False, default=None,
help="Config option assistant_confidence_threshold for Speculative decoding")
help="Config option assistant_confidence_threshold for Speculative decoding", type=float)
parser.add_argument(
'--end_token_stopping',
action='store_true',
Expand Down
10 changes: 7 additions & 3 deletions tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,13 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):

draft_model_path = kwargs.get("draft_model", '')
cb = kwargs.get("use_cb", False)
if cb or draft_model_path:
cb_config = kwargs.get("cb_config")
use_streamer_metrics = False
if cb or cb_config is not None or draft_model_path:
log.info("Continuous Batching mode activated")
ov_config["scheduler_config"] = get_scheduler_config_genai(kwargs.get("cb_config"))
ov_config["scheduler_config"] = get_scheduler_config_genai(cb_config)

use_streamer_metrics = not openvino_genai.get_version().startswith("2025.") or draft_model_path

if draft_model_path:
if not Path(draft_model_path).exists():
Expand Down Expand Up @@ -292,7 +296,7 @@ def get_tokens(self):

def get_time_list(self):
return self.token_generation_time
streamer = TokenStreamer(llm_pipe.get_tokenizer()) if cb or draft_model_path else None
streamer = TokenStreamer(llm_pipe.get_tokenizer()) if use_streamer_metrics else None

return llm_pipe, tokenizer, end - start, streamer, True

Expand Down
28 changes: 2 additions & 26 deletions tools/llm_bench/task/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,6 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
f"is different from md5 of the {num - 1} iteration {prev_md5}")
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
if not args.get("use_cb", False):
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
if bench_hook is not None:
Expand Down Expand Up @@ -231,10 +223,10 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
if args.get('draft_model', ''):
config_info = "Speculative decoding config: "
if args.get('num_assistant_tokens', None):
gen_config.num_assistant_tokens = args['num_assistant_tokens']
gen_config.num_assistant_tokens = int(args['num_assistant_tokens'])
config_info += f" num_assistant_tokens {gen_config.num_assistant_tokens}"
if args.get('assistant_confidence_threshold', None):
gen_config.assistant_confidence_threshold = args['assistant_confidence_threshold']
gen_config.assistant_confidence_threshold = float(args['assistant_confidence_threshold'])
config_info += f" assistant_confidence_threshold {gen_config.assistant_confidence_threshold}"
log.info(config_info)
start = time.perf_counter()
Expand Down Expand Up @@ -339,14 +331,6 @@ def token_printer():
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
f"is different from md5 of the {num - 1} iteration {prev_md5}")
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
if not args.get("use_cb", False):
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)

Expand Down Expand Up @@ -461,14 +445,6 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
f"is different from md5 of the {num - 1} iteration {prev_md5}")
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
if not args.get("use_cb", False):
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
streamer.reset()
Expand Down

0 comments on commit 29e85cf

Please sign in to comment.