Commit 354cee2 baishihao
committed
1 parent c16e7b8 commit 354cee2 Copy full SHA for 354cee2
File tree 2 files changed +5
-3
lines changed
lightllm/server/router/model_infer
mode_backend/chunked_prefill
2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change 15
15
16
16
17
17
class ChunkedPrefillBackend (ModeBackend ):
18
- def __init__ (self ) -> None :
18
+ def __init__ (self , is_multimodal ) -> None :
19
19
super ().__init__ ()
20
+ self .is_multimodal = is_multimodal
20
21
self .forward_step = 0
21
22
args = get_env_start_args ()
22
23
self .max_wait_step = args .router_max_wait_tokens
@@ -31,7 +32,7 @@ def decode(self):
31
32
self .forward_batch (kwargs , run_reqs )
32
33
if len (run_reqs ) == 0 or self .forward_step % self .max_wait_step == 0 :
33
34
# run prefill
34
- kwargs , run_reqs = prepare_prefill_inputs (g_infer_context .infer_req_ids )
35
+ kwargs , run_reqs = prepare_prefill_inputs (g_infer_context .infer_req_ids , self . is_multimodal )
35
36
self .forward_batch (kwargs , run_reqs )
36
37
self .forward_step += 1
37
38
return
Original file line number Diff line number Diff line change @@ -123,13 +123,14 @@ def init_model(self, kvargs):
123
123
is_xgrammar_constraint_mode = False
124
124
is_prefill_node = False
125
125
is_decode_node = False
126
+ is_multimodal = kvargs .get ("enable_multimodal" , False )
126
127
# use_dynamic_prompt_cache = kvargs.get("use_dynamic_prompt_cache", False)
127
128
if is_prefill_node :
128
129
self .backend = ContinuesBatchBackendForPrefillNode (self .info_queue , self .mem_queue )
129
130
elif is_decode_node :
130
131
self .backend = ContinuesBatchBackendForDecodeNode (self .info_queue , self .mem_queue )
131
132
elif enable_chunked_prefill :
132
- self .backend = ChunkedPrefillBackend ()
133
+ self .backend = ChunkedPrefillBackend (is_multimodal )
133
134
elif use_reward_model :
134
135
self .backend = RewardModelBackend ()
135
136
elif return_all_prompt_logprobs :
You can’t perform that action at this time.
0 commit comments