From 2dc2ba49b5aa31568f0895be0f106d462e0da90a Mon Sep 17 00:00:00 2001 From: freeliuzc Date: Wed, 24 Dec 2025 18:45:54 +0800 Subject: [PATCH 1/5] [Speculative Decoding] Fix multistep MTP in splitewise-prefill mode (#5723) --- fastdeploy/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 4c3f262ff28..9170dfb4c8a 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1789,6 +1789,12 @@ def postprocess(self): # It will hang when real batch_size < tp_size self.graph_opt_config.filter_capture_size(tp_size=self.parallel_config.tensor_parallel_size) + # adjust speculative config + if self.speculative_config is not None and self.speculative_config.method == "mtp": + if self.scheduler_config.splitwise_role == "prefill": + self.speculative_config.num_speculative_tokens = 1 + self.speculative_config.num_model_steps = 1 + if self.scheduler_config.splitwise_role == "mixed": self._disable_sequence_parallel_moe_if_needed("Mixed") self.model_config.moe_phase = MoEPhase(phase="prefill") From 0410c42a9a057f1ebdc45536d9fb898ba16e0fc5 Mon Sep 17 00:00:00 2001 From: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Date: Wed, 24 Dec 2025 19:18:00 +0800 Subject: [PATCH 2/5] [CI] Refactor RL tests to reuse stable_test (#5516) * [CI] Refactor RL tests to reuse stable_test --- .github/workflows/ci_image_update.yml | 11 ++++++++++- .github/workflows/pr_build_and_test.yml | 10 ++++++++++ tests/ce/stable_cases/launch_model.sh | 9 ++++----- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci_image_update.yml b/.github/workflows/ci_image_update.yml index 7e6544e6364..cbf3def32b9 100644 --- a/.github/workflows/ci_image_update.yml +++ b/.github/workflows/ci_image_update.yml @@ -137,10 +137,19 @@ jobs: FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + stable_test: + name: Run Stable Tests + needs: [ clone,build ] + uses: ./.github/workflows/_stable_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" publish_pre_check: name: Publish Docker Images Pre Check - needs: [ci_image_build, unittest_coverage,logprob_test,pre_ce_test,base_test] + needs: [ci_image_build,unittest_coverage,logprob_test,pre_ce_test,base_test,stable_test] runs-on: [self-hosted, Docker-Build] steps: - name: Images Uploading diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml index 5abd24966d8..da1630e07cf 100644 --- a/.github/workflows/pr_build_and_test.yml +++ b/.github/workflows/pr_build_and_test.yml @@ -75,3 +75,13 @@ jobs: FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + + stable_test: + name: Run Stable Tests + needs: [clone,build] + uses: ./.github/workflows/_stable_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh index 3b758a15a2a..570b37d6569 100644 --- a/tests/ce/stable_cases/launch_model.sh +++ b/tests/ce/stable_cases/launch_model.sh @@ -1,9 +1,9 @@ #!/bin/bash MODEL_PATH="${1}/TP2" -FD_API_PORT=${FD_API_PORT:-8000} -FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001} -FD_METRICS_PORT=${FD_METRICS_PORT:-8002} -FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003} +FD_API_PORT=${FD_API_PORT:-8180} +FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8181} +FD_METRICS_PORT=${FD_METRICS_PORT:-8182} +FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8183} @@ -36,7 +36,6 @@ python -m fastdeploy.entrypoints.openai.api_server \ --engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \ --metrics-port ${FD_METRICS_PORT} \ --cache-queue-port ${FD_CACHE_QUEUE_PORT} \ - --quantization wint8 \ --max-model-len 32768 \ --max-num-seqs 1 \ --gpu-memory-utilization 0.9 \ From 6e39f88ca07b3047dc2ae78d07e0aaf3b926dbea Mon Sep 17 00:00:00 2001 From: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Date: Wed, 24 Dec 2025 21:28:38 +0800 Subject: [PATCH 3/5] [CI] Fix ci_image_update error of no depends --- .github/workflows/ci_image_update.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_image_update.yml b/.github/workflows/ci_image_update.yml index cbf3def32b9..bc40f71ced0 100644 --- a/.github/workflows/ci_image_update.yml +++ b/.github/workflows/ci_image_update.yml @@ -139,7 +139,7 @@ jobs: stable_test: name: Run Stable Tests - needs: [ clone,build ] + needs: [clone,build_sm8090,ci_image_build] uses: ./.github/workflows/_stable_test.yml with: DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate From b90a922f98f5d4485976d24f70b56909e7b8f2c1 Mon Sep 17 00:00:00 2001 From: chenjian <1435317881@qq.com> Date: Wed, 24 Dec 2025 21:37:24 +0800 Subject: [PATCH 4/5] [Bug fix] Set enable_cache_output as false by default (#5751) --- fastdeploy/engine/args_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index d14ad8897c4..96c4c57f5c7 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -240,7 +240,7 @@ class EngineArgs: """ Flag to enable prefix caching. """ - enable_output_caching: bool = True + enable_output_caching: bool = False """ Flag to enable kv cache for output tokens, only valid in V1 scheduler. """ From 9624bf3c6e18cff20334d3e4170bdec4bc7971e9 Mon Sep 17 00:00:00 2001 From: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Date: Wed, 24 Dec 2025 22:44:34 +0800 Subject: [PATCH 5/5] [CI] Fix image build to use the correct upstream artifacts --- .github/workflows/ci_image_update.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_image_update.yml b/.github/workflows/ci_image_update.yml index bc40f71ced0..da1256e204c 100644 --- a/.github/workflows/ci_image_update.yml +++ b/.github/workflows/ci_image_update.yml @@ -142,9 +142,9 @@ jobs: needs: [clone,build_sm8090,ci_image_build] uses: ./.github/workflows/_stable_test.yml with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + DOCKER_IMAGE: ${{ needs.ci_image_build.outputs.docker_name_precheck }} FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" publish_pre_check: