ST-XX · pull · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/.github/workflows/ci_image_update.yml b/.github/workflows/ci_image_update.yml
@@ -137,10 +137,19 @@ jobs:
       FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }}
       MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
 
+  stable_test:
+    name: Run Stable Tests
+    needs: [clone,build_sm8090,ci_image_build]
+    uses: ./.github/workflows/_stable_test.yml
+    with:
+      DOCKER_IMAGE: ${{ needs.ci_image_build.outputs.docker_name_precheck }}
+      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
+      FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }}
+      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
 
   publish_pre_check:
     name: Publish Docker Images Pre Check
-    needs: [ci_image_build, unittest_coverage,logprob_test,pre_ce_test,base_test]
+    needs: [ci_image_build,unittest_coverage,logprob_test,pre_ce_test,base_test,stable_test]
     runs-on: [self-hosted, Docker-Build]
     steps:
       - name: Images Uploading

diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml
@@ -75,3 +75,13 @@ jobs:
       FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
       FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
       MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
+
+  stable_test:
+    name: Run Stable Tests
+    needs: [clone,build]
+    uses: ./.github/workflows/_stable_test.yml
+    with:
+      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
+      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
+      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
+      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
diff --git a/fastdeploy/config.py b/fastdeploy/config.py
@@ -1789,6 +1789,12 @@ def postprocess(self):
                 # It will hang when real batch_size < tp_size
                 self.graph_opt_config.filter_capture_size(tp_size=self.parallel_config.tensor_parallel_size)
 
+        # adjust speculative config
+        if self.speculative_config is not None and self.speculative_config.method == "mtp":
+            if self.scheduler_config.splitwise_role == "prefill":
+                self.speculative_config.num_speculative_tokens = 1
+                self.speculative_config.num_model_steps = 1
+
         if self.scheduler_config.splitwise_role == "mixed":
             self._disable_sequence_parallel_moe_if_needed("Mixed")
             self.model_config.moe_phase = MoEPhase(phase="prefill")

diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py
@@ -240,7 +240,7 @@ class EngineArgs:
     """
     Flag to enable prefix caching.
     """
-    enable_output_caching: bool = True
+    enable_output_caching: bool = False
     """
     Flag to enable kv cache for output tokens, only valid in V1 scheduler.
     """

diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 MODEL_PATH="${1}/TP2"
-FD_API_PORT=${FD_API_PORT:-8000}
-FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
-FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
-FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
+FD_API_PORT=${FD_API_PORT:-8180}
+FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8181}
+FD_METRICS_PORT=${FD_METRICS_PORT:-8182}
+FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8183}
 
 
 
@@ -36,7 +36,6 @@ python -m fastdeploy.entrypoints.openai.api_server \
        --engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \
        --metrics-port ${FD_METRICS_PORT} \
        --cache-queue-port ${FD_CACHE_QUEUE_PORT} \
-       --quantization wint8 \
        --max-model-len 32768 \
        --max-num-seqs 1 \
        --gpu-memory-utilization 0.9 \