diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index fd86fe273cb..56a8980882b 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -545,7 +545,9 @@ async def completion_stream_generator( reasoning_content="", arrival_time=arrival_time, logprobs=logprobs_res, - prompt_logprobs=clamp_prompt_logprobs(prompt_logprobs_res), + prompt_logprobs=( + clamp_prompt_logprobs(prompt_logprobs_res) if not request.return_token_ids else None + ), draft_logprobs=draft_logprobs_res, speculate_metrics=output_speculate_metrics, ) diff --git a/fastdeploy/utils.py b/fastdeploy/utils.py index 9352e6ab881..2796b931c2f 100644 --- a/fastdeploy/utils.py +++ b/fastdeploy/utils.py @@ -1154,7 +1154,7 @@ def _bos_download(bos_client, link): console_logger = get_logger("console", "console.log", print_to_console=True) spec_logger = get_logger("speculate", "speculate.log") zmq_client_logger = get_logger("zmq_client", "zmq_client.log") -trace_logger = FastDeployLogger().get_trace_logger("trace_logger", "trace_logger.log") +trace_logger = FastDeployLogger().get_trace_logger("trace", "trace.log") router_logger = get_logger("router", "router.log") fmq_logger = get_logger("fmq", "fmq.log")