Skip to content

Commit 755c4fd

Browse files
WANDY666wanzihao
and
wanzihao
authored
fix: intxweigtht -> wxa16, b_ready_cache_len -> infer_state.b_ready_c… (#371)
…ache_len Co-authored-by: wanzihao <wanzihao@sensetime.com>
1 parent c3dc640 commit 755c4fd

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

lightllm/common/basemodel/basemodel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(self, kvargs):
3838
self.weight_dir_ = kvargs["weight_dir"]
3939
self.max_total_token_num = kvargs["max_total_token_num"]
4040
self.load_way = kvargs.get("load_way", "HF")
41-
self.mode = kvargs.get("mode", [])
41+
self.mode = [m.replace('int4weight', 'w4a16').replace('int8weight', 'w8a16') for m in kvargs.get("mode", [])]
4242
self.weight_dict = kvargs.get("weight_dict", None)
4343
self.finetune_config = kvargs.get("finetune_config", None)
4444
self.max_req_num = kvargs.get("max_req_num", 1000)
@@ -231,7 +231,7 @@ def _prefill(
231231
self.req_manager.req_to_token_indexs,
232232
b_req_idx,
233233
b_seq_len,
234-
b_ready_cache_len,
234+
infer_state.b_ready_cache_len,
235235
max_len_in_batch,
236236
infer_state.mem_index,
237237
)

0 commit comments

Comments
 (0)