[Loader][BugFix] Fix some parameters place on CPU in PaddleOCR-VL (#5413)

SigureMo · web-flow · commit 0c66163dfdd7 · 2025-12-08T10:01:00.000+08:00
* [BugFix] Fix some parameter place on CPU in PaddleOCR-VL

* clean log

* fix codestyle
diff --git a/fastdeploy/model_executor/models/paddleocr_vl/projector.py b/fastdeploy/model_executor/models/paddleocr_vl/projector.py
@@ -20,6 +20,8 @@
 import paddle
 import paddle.nn as nn
 
+from fastdeploy.model_executor.utils import h2d_copy
+
 
 class GELUActivation(nn.Layer):
     """
@@ -97,6 +99,8 @@ def forward(self, image_features, image_grid_thw):
 
     def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
         loaded_weight = loaded_weight.transpose([1, 0])
+        if not param._is_initialized():
+            param.initialize()
         assert param.shape == loaded_weight.shape, (
             f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
         )
@@ -106,4 +110,4 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 loaded_weight = loaded_weight.view(param.dtype)
             else:
                 loaded_weight = loaded_weight.cast(param.dtype)
-        param.copy_(loaded_weight, False)
+        h2d_copy(param, loaded_weight)
diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
@@ -100,6 +100,8 @@ def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str]
 
     def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
         loaded_weight = loaded_weight.transpose([1, 0])
+        if not param._is_initialized():
+            param.initialize()
         assert param.shape == loaded_weight.shape, (
             f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
         )
@@ -109,7 +111,7 @@ def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional
                 loaded_weight = loaded_weight.view(param.dtype)
             else:
                 loaded_weight = loaded_weight.cast(param.dtype)
-        param.copy_(loaded_weight, False)
+        h2d_copy(param, loaded_weight)
 
     def forward(
         self,
@@ -287,6 +289,8 @@ def __init__(self, config):
 
     def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
         loaded_weight = loaded_weight.transpose([1, 0])
+        if not param._is_initialized():
+            param.initialize()
         assert param.shape == loaded_weight.shape, (
             f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
         )
@@ -296,7 +300,7 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 loaded_weight = loaded_weight.view(param.dtype)
             else:
                 loaded_weight = loaded_weight.cast(param.dtype)
-        param.copy_(loaded_weight, False)
+        h2d_copy(param, loaded_weight)
 
     def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor:
         hidden_states = self.fc1(hidden_states)