Skip to content

Commit

Permalink
add use_cache for rope
Browse files Browse the repository at this point in the history
  • Loading branch information
faaany committed Jun 3, 2024
1 parent 14673da commit a2a969e
Showing 1 changed file with 21 additions and 20 deletions.
41 changes: 21 additions & 20 deletions optimum/exporters/ipex/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,25 +172,26 @@ def qkv_gemm(self, hidden_states):

return query, key, value

def rope(self, query, key, kv_seq_len, position_ids):
key = self.ipex_rope(
key,
position_ids,
self.num_key_value_heads,
self.head_dim,
self.head_dim // 2,
self.head_dim,
kv_seq_len,
)
query = self.ipex_rope(
query,
position_ids,
self.num_heads,
self.head_dim,
self.head_dim // 2,
self.head_dim,
kv_seq_len,
)
def rope(self, query, key, kv_seq_len, position_ids, use_cache):
if use_cache:
key = self.ipex_rope(
key,
position_ids,
self.num_key_value_heads,
self.head_dim,
self.head_dim // 2,
self.head_dim,
kv_seq_len,
)
query = self.ipex_rope(
query,
position_ids,
self.num_heads,
self.head_dim,
self.head_dim // 2,
self.head_dim,
kv_seq_len,
)
return query, key

def sdpa(self, query, key, value, past_key_value, attention_mask, use_cache):
Expand Down Expand Up @@ -265,7 +266,7 @@ def forward(
kv_seq_len = seq_len + past_key_value[0].size(-2) if past_key_value is not None else seq_len

query, key, value = self.qkv_gemm(hidden_states)
query, key = self.rope(query, key, kv_seq_len, position_ids)
query, key = self.rope(query, key, kv_seq_len, position_ids, use_cache)

attn_output, past_key_value, attn_weights = self.sdpa(
query, key, value, past_key_value, attention_mask, use_cache
Expand Down

0 comments on commit a2a969e

Please sign in to comment.