From 1901eef05ee03ac361611a2e37154d5cd55761ab Mon Sep 17 00:00:00 2001 From: lizhenyun01 <1500424927@qq.com> Date: Wed, 31 Dec 2025 16:03:35 +0800 Subject: [PATCH 1/2] [Feature] support rl_tp_degree --- fastdeploy/model_executor/layers/embeddings.py | 4 ++++ fastdeploy/model_executor/layers/linear.py | 17 ++++++++++++++++- fastdeploy/model_executor/layers/lm_head.py | 8 ++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/embeddings.py b/fastdeploy/model_executor/layers/embeddings.py index 5ae82efe4ca..1bba6b70abc 100644 --- a/fastdeploy/model_executor/layers/embeddings.py +++ b/fastdeploy/model_executor/layers/embeddings.py @@ -173,6 +173,10 @@ def __init__( ) set_weight_attrs(self.embeddings.weight, {"output_dim": False}) set_weight_attrs(self.embeddings.weight, {"weight_loader": self.weight_loader}) + set_weight_attrs( + self.embeddings.weight, + {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}, + ) else: # column cut embedding self.embeddings = nn.Embedding( diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 49b25dc3d0c..107fa8bddba 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -454,9 +454,10 @@ def __init__( ) if self.tp_size > 0: + _set_var_distributed(self.weight, split_axis=-1) if self.with_bias: # col parallel - _set_var_distributed(self.bias, split_axis=1) + _set_var_distributed(self.bias, split_axis=0) set_weight_attrs( self.bias, { @@ -468,6 +469,14 @@ def __init__( ), }, ) + # set_rl_tp_degree + set_weight_attrs( + self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) + if self.with_bias: + set_weight_attrs( + self.bias, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) class MergedColumnParallelLinear(ColumnParallelLinear): @@ -896,6 +905,7 @@ def __init__( model_format=fd_config.model_config.model_format, ) if self.tp_size > 1: + _set_var_distributed(self.weight, split_axis=0) create_weight_kwargs["split_axis"] = 0 create_weight_kwargs["is_distributed"] = True self.quant_method.create_weights(**create_weight_kwargs) @@ -906,6 +916,11 @@ def __init__( assert with_bias, "with_bias must be True when add_bias is True." if self.tp_size > 1 and self.reduce_results: set_weight_attrs(self.bias, {"tp_row_bias": True}) + + # set_rl_tp_degree + set_weight_attrs( + self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) def all2all_transpose(self, x: paddle.Tensor) -> paddle.Tensor: token_num = x.shape[0] diff --git a/fastdeploy/model_executor/layers/lm_head.py b/fastdeploy/model_executor/layers/lm_head.py index a7bff3905b0..04175c810c3 100644 --- a/fastdeploy/model_executor/layers/lm_head.py +++ b/fastdeploy/model_executor/layers/lm_head.py @@ -105,6 +105,11 @@ def __init__( if self.tp_size > 1: if with_bias: set_weight_attrs(self.linear.bias, {"output_dim": True}) + if self.bias_key is not None: + set_weight_attrs( + self.linear.bias, + {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}, + ) else: self.linear = RowParallelLinear( @@ -123,6 +128,9 @@ def __init__( }, ) set_weight_attrs(self.linear.weight, {"output_dim": False}) + set_weight_attrs( + self.linear.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) def load_state_dict(self, state_dict: Dict[str, paddle.Tensor | np.ndarray]): """ From 7460ca1e043a3100df9bd94af427d2d93bf07813 Mon Sep 17 00:00:00 2001 From: lizhenyun01 <1500424927@qq.com> Date: Wed, 31 Dec 2025 16:17:16 +0800 Subject: [PATCH 2/2] fix code-style --- fastdeploy/model_executor/layers/linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 107fa8bddba..e137cd1bed9 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -916,7 +916,7 @@ def __init__( assert with_bias, "with_bias must be True when add_bias is True." if self.tp_size > 1 and self.reduce_results: set_weight_attrs(self.bias, {"tp_row_bias": True}) - + # set_rl_tp_degree set_weight_attrs( self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}