|
8 | 8 | from transformers import PreTrainedTokenizer
|
9 | 9 |
|
10 | 10 | from vllm.config import DecodingConfig, ModelConfig
|
| 11 | +from vllm.core.scheduler import SchedulerOutputs |
11 | 12 | from vllm.engine.arg_utils import AsyncEngineArgs
|
12 | 13 | from vllm.engine.llm_engine import LLMEngine
|
13 | 14 | from vllm.executor.ray_utils import initialize_ray_cluster, ray
|
14 | 15 | from vllm.logger import init_logger
|
15 | 16 | from vllm.lora.request import LoRARequest
|
16 | 17 | from vllm.outputs import RequestOutput
|
17 | 18 | from vllm.sampling_params import SamplingParams
|
18 |
| -from vllm.sequence import MultiModalData |
| 19 | +from vllm.sequence import MultiModalData, SamplerOutput |
19 | 20 | from vllm.usage.usage_lib import UsageContext
|
20 | 21 |
|
21 | 22 | logger = init_logger(__name__)
|
@@ -224,8 +225,7 @@ async def step_async(self) -> List[RequestOutput]:
|
224 | 225 | scheduler_outputs.ignored_seq_groups, seq_group_metadata_list)
|
225 | 226 |
|
226 | 227 | # Log stats.
|
227 |
| - if self.log_stats: |
228 |
| - self.stat_logger.log(self._get_stats(scheduler_outputs)) |
| 228 | + self.do_log_stats(scheduler_outputs, output) |
229 | 229 |
|
230 | 230 | return request_outputs
|
231 | 231 |
|
@@ -707,9 +707,13 @@ async def get_decoding_config(self) -> DecodingConfig:
|
707 | 707 | else:
|
708 | 708 | return self.engine.get_decoding_config()
|
709 | 709 |
|
710 |
| - async def do_log_stats(self) -> None: |
| 710 | + async def do_log_stats( |
| 711 | + self, |
| 712 | + scheduler_outputs: Optional[SchedulerOutputs] = None, |
| 713 | + model_output: Optional[List[SamplerOutput]] = None) -> None: |
711 | 714 | if self.engine_use_ray:
|
712 |
| - await self.engine.do_log_stats.remote() # type: ignore |
| 715 | + await self.engine.do_log_stats.remote( # type: ignore |
| 716 | + scheduler_outputs, model_output) |
713 | 717 | else:
|
714 | 718 | self.engine.do_log_stats()
|
715 | 719 |
|
|
0 commit comments