From 1901eef05ee03ac361611a2e37154d5cd55761ab Mon Sep 17 00:00:00 2001
From: lizhenyun01 <1500424927@qq.com>
Date: Wed, 31 Dec 2025 16:03:35 +0800
Subject: [PATCH 1/2] [Feature] support rl_tp_degree

---
 fastdeploy/model_executor/layers/embeddings.py |  4 ++++
 fastdeploy/model_executor/layers/linear.py     | 17 ++++++++++++++++-
 fastdeploy/model_executor/layers/lm_head.py    |  8 ++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/fastdeploy/model_executor/layers/embeddings.py b/fastdeploy/model_executor/layers/embeddings.py
index 5ae82efe4ca..1bba6b70abc 100644
--- a/fastdeploy/model_executor/layers/embeddings.py
+++ b/fastdeploy/model_executor/layers/embeddings.py
@@ -173,6 +173,10 @@ def __init__(
             )
             set_weight_attrs(self.embeddings.weight, {"output_dim": False})
             set_weight_attrs(self.embeddings.weight, {"weight_loader": self.weight_loader})
+            set_weight_attrs(
+                self.embeddings.weight,
+                {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}},
+            )
         else:
             # column cut embedding
             self.embeddings = nn.Embedding(
diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py
index 49b25dc3d0c..107fa8bddba 100644
--- a/fastdeploy/model_executor/layers/linear.py
+++ b/fastdeploy/model_executor/layers/linear.py
@@ -454,9 +454,10 @@ def __init__(
         )
 
         if self.tp_size > 0:
+            _set_var_distributed(self.weight, split_axis=-1)
             if self.with_bias:
                 # col parallel
-                _set_var_distributed(self.bias, split_axis=1)
+                _set_var_distributed(self.bias, split_axis=0)
                 set_weight_attrs(
                     self.bias,
                     {
@@ -468,6 +469,14 @@ def __init__(
                         ),
                     },
                 )
+        # set_rl_tp_degree
+        set_weight_attrs(
+            self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}
+        )
+        if self.with_bias:
+            set_weight_attrs(
+                self.bias, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}
+            )
 
 
 class MergedColumnParallelLinear(ColumnParallelLinear):
@@ -896,6 +905,7 @@ def __init__(
             model_format=fd_config.model_config.model_format,
         )
         if self.tp_size > 1:
+            _set_var_distributed(self.weight, split_axis=0)
             create_weight_kwargs["split_axis"] = 0
             create_weight_kwargs["is_distributed"] = True
         self.quant_method.create_weights(**create_weight_kwargs)
@@ -906,6 +916,11 @@ def __init__(
             assert with_bias, "with_bias must be True when add_bias is True."
             if self.tp_size > 1 and self.reduce_results:
                 set_weight_attrs(self.bias, {"tp_row_bias": True})
+        
+        # set_rl_tp_degree
+        set_weight_attrs(
+            self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}
+        )
 
     def all2all_transpose(self, x: paddle.Tensor) -> paddle.Tensor:
         token_num = x.shape[0]
diff --git a/fastdeploy/model_executor/layers/lm_head.py b/fastdeploy/model_executor/layers/lm_head.py
index a7bff3905b0..04175c810c3 100644
--- a/fastdeploy/model_executor/layers/lm_head.py
+++ b/fastdeploy/model_executor/layers/lm_head.py
@@ -105,6 +105,11 @@ def __init__(
                 if self.tp_size > 1:
                     if with_bias:
                         set_weight_attrs(self.linear.bias, {"output_dim": True})
+                if self.bias_key is not None:
+                    set_weight_attrs(
+                        self.linear.bias,
+                        {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}},
+                    )
 
             else:
                 self.linear = RowParallelLinear(
@@ -123,6 +128,9 @@ def __init__(
                     },
                 )
                 set_weight_attrs(self.linear.weight, {"output_dim": False})
+        set_weight_attrs(
+            self.linear.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}
+        )
 
     def load_state_dict(self, state_dict: Dict[str, paddle.Tensor | np.ndarray]):
         """

From 7460ca1e043a3100df9bd94af427d2d93bf07813 Mon Sep 17 00:00:00 2001
From: lizhenyun01 <1500424927@qq.com>
Date: Wed, 31 Dec 2025 16:17:16 +0800
Subject: [PATCH 2/2] fix code-style

---
 fastdeploy/model_executor/layers/linear.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py
index 107fa8bddba..e137cd1bed9 100644
--- a/fastdeploy/model_executor/layers/linear.py
+++ b/fastdeploy/model_executor/layers/linear.py
@@ -916,7 +916,7 @@ def __init__(
             assert with_bias, "with_bias must be True when add_bias is True."
             if self.tp_size > 1 and self.reduce_results:
                 set_weight_attrs(self.bias, {"tp_row_bias": True})
-        
+
         # set_rl_tp_degree
         set_weight_attrs(
             self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}