From 1ee453573cdcc2bd5bbb2e1ad63e2b60ba56a7ac Mon Sep 17 00:00:00 2001 From: eaidova Date: Tue, 30 Apr 2024 10:37:06 +0400 Subject: [PATCH] add test --- optimum/intel/openvino/modeling_decoder.py | 28 ++++---- tests/openvino/test_modeling.py | 81 ++++++++++++++++++++++ 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 7c8dcd0995..20eb3ec3a5 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -1407,22 +1407,22 @@ def _group_beam_search( beam_next_tokens = beam_outputs["next_beam_tokens"] beam_idx = beam_outputs["next_beam_indices"] - if return_dict_in_generate and output_scores: - beam_indices[beam_group_idx] = tuple( - beam_indices[beam_group_idx][beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices[0])) - ) + if return_dict_in_generate and output_scores: + beam_indices[beam_group_idx] = tuple( + beam_indices[beam_group_idx][beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices[0])) + ) - input_ids[batch_group_indices] = group_input_ids[beam_idx] - group_input_ids = torch.cat([group_input_ids[beam_idx, :], beam_next_tokens.unsqueeze(-1)], dim=-1) - current_tokens[batch_group_indices] = group_input_ids[:, -1] + input_ids[batch_group_indices] = group_input_ids[beam_idx] + group_input_ids = torch.cat([group_input_ids[beam_idx, :], beam_next_tokens.unsqueeze(-1)], dim=-1) + current_tokens[batch_group_indices] = group_input_ids[:, -1] - # (beam_idx // group_size) -> batch_idx - # (beam_idx % group_size) -> offset of idx inside the group - reordering_indices[batch_group_indices] = ( - num_beams * torch.div(beam_idx, group_size, rounding_mode="floor") - + group_start_idx - + (beam_idx % group_size) - ) + # (beam_idx // group_size) -> batch_idx + # (beam_idx % group_size) -> offset of idx inside the group + reordering_indices[batch_group_indices] = ( + num_beams * torch.div(beam_idx, group_size, rounding_mode="floor") + + group_start_idx + + (beam_idx % group_size) + ) # Store scores, attentions and hidden_states when required if return_dict_in_generate: diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 4ead8b37c2..b0ec810f31 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -757,6 +757,87 @@ def test_default_filling_attention_mask_and_position_ids(self): del model_with_cache gc.collect() + def test_beam_search(self): + model_id = MODEL_NAMES["llama"] + ov_model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True) + ov_model_stateless = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=False) + transformers_model = AutoModelForCausalLM.from_pretrained(model_id) + + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token + tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) + ov_model_stateful.generation_config.eos_token_id = None + ov_model_stateless.generation_config.eos_token_id = None + transformers_model.generation_config.eos_token_id = None + ov_model_stateful.config.eos_token_id = None + ov_model_stateless.config.eos_token_id = None + transformers_model.config.eos_token_id = None + + # beam search + gen_config = GenerationConfig( + max_new_tokens=10, + min_new_tokens=10, + num_beams=4, + do_sample=False, + eos_token_id=None, + ) + + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + ov_stateful_outputs = ov_model_stateful.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateful_outputs, transformers_outputs)) + ov_stateless_outputs = ov_model_stateless.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateless_outputs, transformers_outputs)) + # beam sample + gen_config = GenerationConfig( + max_new_tokens=10, + min_new_tokens=10, + num_beams=4, + do_sample=True, + eos_token_id=None, + top_k=1, + ) + + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + ov_stateful_outputs = ov_model_stateful.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateful_outputs, transformers_outputs)) + ov_stateless_outputs = ov_model_stateless.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateless_outputs, transformers_outputs)) + + # group beam search + gen_config = GenerationConfig( + max_new_tokens=10, + min_new_tokens=10, + num_beams=4, + do_sample=False, + eos_token_id=None, + num_beam_groups=2, + diversity_penalty=0.0000001, + ) + + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + ov_stateful_outputs = ov_model_stateful.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateful_outputs, transformers_outputs)) + ov_stateless_outputs = ov_model_stateless.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateless_outputs, transformers_outputs)) + + # constrained beam search + force_word = "cat" + force_words_ids = [tokenizer([force_word], add_special_tokens=False).input_ids] + gen_config = GenerationConfig( + max_new_tokens=10, + min_new_tokens=10, + num_beams=4, + do_sample=False, + eos_token_id=None, + force_words_ids=force_words_ids, + ) + + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + ov_stateful_outputs = ov_model_stateful.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateful_outputs, transformers_outputs)) + ov_stateless_outputs = ov_model_stateless.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_stateless_outputs, transformers_outputs)) + class OVModelForMaskedLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = (