Skip to content

Commit 354cee2

Browse files
author
baishihao
committed
fix chunked prefill
1 parent c16e7b8 commit 354cee2

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515

1616

1717
class ChunkedPrefillBackend(ModeBackend):
18-
def __init__(self) -> None:
18+
def __init__(self, is_multimodal) -> None:
1919
super().__init__()
20+
self.is_multimodal = is_multimodal
2021
self.forward_step = 0
2122
args = get_env_start_args()
2223
self.max_wait_step = args.router_max_wait_tokens
@@ -31,7 +32,7 @@ def decode(self):
3132
self.forward_batch(kwargs, run_reqs)
3233
if len(run_reqs) == 0 or self.forward_step % self.max_wait_step == 0:
3334
# run prefill
34-
kwargs, run_reqs = prepare_prefill_inputs(g_infer_context.infer_req_ids)
35+
kwargs, run_reqs = prepare_prefill_inputs(g_infer_context.infer_req_ids, self.is_multimodal)
3536
self.forward_batch(kwargs, run_reqs)
3637
self.forward_step += 1
3738
return

lightllm/server/router/model_infer/model_rpc.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,14 @@ def init_model(self, kvargs):
123123
is_xgrammar_constraint_mode = False
124124
is_prefill_node = False
125125
is_decode_node = False
126+
is_multimodal = kvargs.get("enable_multimodal", False)
126127
# use_dynamic_prompt_cache = kvargs.get("use_dynamic_prompt_cache", False)
127128
if is_prefill_node:
128129
self.backend = ContinuesBatchBackendForPrefillNode(self.info_queue, self.mem_queue)
129130
elif is_decode_node:
130131
self.backend = ContinuesBatchBackendForDecodeNode(self.info_queue, self.mem_queue)
131132
elif enable_chunked_prefill:
132-
self.backend = ChunkedPrefillBackend()
133+
self.backend = ChunkedPrefillBackend(is_multimodal)
133134
elif use_reward_model:
134135
self.backend = RewardModelBackend()
135136
elif return_all_prompt_logprobs:

0 commit comments

Comments
 (0)