Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions .github/workflows/_base_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,28 @@ jobs:

curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_sot.yaml\", \"--enable-logprob\": \"False\"}"
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_sot_wint4.yaml\", \"--enable-logprob\": \"False\"}"
check_service 360
export TEMPLATE=TOKEN_NORMAL
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_cinn.yaml\", \"--enable-logprob\": \"False\"}"
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_cinn_wint4.yaml\", \"--enable-logprob\": \"False\"}"
check_service 360
export TEMPLATE=TOKEN_NORMAL
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_sot_fp8.yaml\", \"--enable-logprob\": \"False\"}"
check_service 360
export TEMPLATE=TOKEN_NORMAL
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1

curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"ernie45t_21b_cinn_fp8.yaml\", \"--enable-logprob\": \"False\"}"
check_service 360
export TEMPLATE=TOKEN_NORMAL
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
Expand Down
5 changes: 5 additions & 0 deletions custom_ops/gpu_ops/update_inputs_v1.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ __global__ void update_inputs_kernel_v1(bool* not_need_stop,
}
if (thread_idx < bsz) {
if (stop_flag_now) {
// chuned when max_tokens=1
if (seq_lens_this_time[thread_idx] + seq_lens_decoder[thread_idx] <
prompt_lens[thread_idx]) {
topk_ids[thread_idx] = -1;
}
seq_lens_this_time[thread_idx] = 0; // stop at next step
seq_lens_decoder[thread_idx] = 0;
seq_lens_encoder[thread_idx] = 0;
Expand Down
15 changes: 14 additions & 1 deletion fastdeploy/distributed/communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import paddle.distributed as dist
from paddle.distributed import fleet

from fastdeploy.utils import register_custom_python_op

_TP_AR = None


Expand Down Expand Up @@ -50,7 +52,18 @@ def custom_ar_clear_ipc_handles():

try:

@paddle.jit.marker.unified
def tensor_model_parallel_all_reduce_infer_meta(x: "paddle.static.MetaTensor", group_) -> paddle.static.MetaTensor:
return paddle.static.MetaTensor(shape=x.shape, dtype=x.dtype)

@register_custom_python_op(
name="tensor_model_parallel_all_reduce",
infer_meta=tensor_model_parallel_all_reduce_infer_meta,
input_names=[
"input_",
],
output_names=["out"],
inplace_map={},
)
def tensor_model_parallel_all_reduce(
input_: paddle.Tensor,
group_: paddle.distributed.communication.group.Group = None,
Expand Down
Loading
Loading