From 133dd7a7373a3c9b5b0739497ecd266cfb6f70ae Mon Sep 17 00:00:00 2001 From: Casper Date: Thu, 21 Sep 2023 13:30:57 +0200 Subject: [PATCH] Fix usage of engine --- awq/modules/fused/attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awq/modules/fused/attn.py b/awq/modules/fused/attn.py index 332b7163..73bedd26 100644 --- a/awq/modules/fused/attn.py +++ b/awq/modules/fused/attn.py @@ -219,7 +219,7 @@ def forward( xv = xv.view((bsz,) + self.attention_shapes["single_xv_view"]) past_key_value = (xk, xv) if use_cache else None - attention_weight = awq_inference_engine.single_query_attention( + attention_weight = ft_inference_engine.single_query_attention( xq, # query xk, # key xv, # value