Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .github/workflows/ci_image_update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,19 @@ jobs:
FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }}
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"

stable_test:
name: Run Stable Tests
needs: [clone,build_sm8090,ci_image_build]
uses: ./.github/workflows/_stable_test.yml
with:
DOCKER_IMAGE: ${{ needs.ci_image_build.outputs.docker_name_precheck }}
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }}
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"

publish_pre_check:
name: Publish Docker Images Pre Check
needs: [ci_image_build, unittest_coverage,logprob_test,pre_ce_test,base_test]
needs: [ci_image_build,unittest_coverage,logprob_test,pre_ce_test,base_test,stable_test]
runs-on: [self-hosted, Docker-Build]
steps:
- name: Images Uploading
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/pr_build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,13 @@ jobs:
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"

stable_test:
name: Run Stable Tests
needs: [clone,build]
uses: ./.github/workflows/_stable_test.yml
with:
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
6 changes: 6 additions & 0 deletions fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,12 @@ def postprocess(self):
# It will hang when real batch_size < tp_size
self.graph_opt_config.filter_capture_size(tp_size=self.parallel_config.tensor_parallel_size)

# adjust speculative config
if self.speculative_config is not None and self.speculative_config.method == "mtp":
if self.scheduler_config.splitwise_role == "prefill":
self.speculative_config.num_speculative_tokens = 1
self.speculative_config.num_model_steps = 1

if self.scheduler_config.splitwise_role == "mixed":
self._disable_sequence_parallel_moe_if_needed("Mixed")
self.model_config.moe_phase = MoEPhase(phase="prefill")
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/engine/args_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class EngineArgs:
"""
Flag to enable prefix caching.
"""
enable_output_caching: bool = True
enable_output_caching: bool = False
"""
Flag to enable kv cache for output tokens, only valid in V1 scheduler.
"""
Expand Down
9 changes: 4 additions & 5 deletions tests/ce/stable_cases/launch_model.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash
MODEL_PATH="${1}/TP2"
FD_API_PORT=${FD_API_PORT:-8000}
FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
FD_API_PORT=${FD_API_PORT:-8180}
FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8181}
FD_METRICS_PORT=${FD_METRICS_PORT:-8182}
FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8183}



Expand Down Expand Up @@ -36,7 +36,6 @@ python -m fastdeploy.entrypoints.openai.api_server \
--engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \
--metrics-port ${FD_METRICS_PORT} \
--cache-queue-port ${FD_CACHE_QUEUE_PORT} \
--quantization wint8 \
--max-model-len 32768 \
--max-num-seqs 1 \
--gpu-memory-utilization 0.9 \
Expand Down
Loading