From 7f45ff529b396c865ad661a1e4d99d431c5d61f3 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 16 Oct 2025 13:46:16 +0800
Subject: [PATCH 01/49] Fix bugs (#4616)

* Rename files and classes

* Fix doc

* Fix bug
---
 .../tutorials/ocr_pipelines/PaddleOCR-VL.md   |  9 ++----
 .../models/doc_vlm/modeling/__init__.py       |  2 +-
 .../{ppocrvl => paddleocr_vl}/__init__.py     |  2 +-
 .../{ppocrvl => paddleocr_vl}/_config.py      |  6 ++--
 .../_distributed/__init__.py                  |  0
 .../_distributed/common_dist_utils.py         |  0
 .../{ppocrvl => paddleocr_vl}/_ernie.py       | 28 +++++++++----------
 .../_fusion_ops/__init__.py                   |  0
 .../_fusion_ops/common_fusion_ops.py          |  0
 .../_fusion_ops/npu_fusion_ops.py             |  0
 .../_paddleocr_vl.py}                         | 12 ++++----
 .../{ppocrvl => paddleocr_vl}/_projector.py   |  0
 .../_refined_recompute/__init__.py            |  0
 .../_refined_recompute/utils.py               |  0
 .../_sequence_parallel_utils.py               |  0
 .../{ppocrvl => paddleocr_vl}/_siglip.py      |  4 +--
 paddlex/inference/models/doc_vlm/predictor.py |  8 +++---
 .../models/doc_vlm/processors/__init__.py     |  2 +-
 .../{ppocrvl => paddleocr_vl}/__init__.py     |  2 +-
 .../_paddleocr_vl.py}                         |  2 +-
 .../{ppocrvl => paddleocr_vl}/_siglip.py      |  0
 paddlex/inference/pipelines/__init__.py       |  2 +-
 .../pipelines/paddleocr_vl/__init__.py        |  2 +-
 .../pipelines/paddleocr_vl/pipeline.py        | 18 ++++++------
 .../pipelines/paddleocr_vl/result.py          | 10 +++----
 25 files changed, 53 insertions(+), 56 deletions(-)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/__init__.py (90%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_config.py (98%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_distributed/__init__.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_distributed/common_dist_utils.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_ernie.py (99%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_fusion_ops/__init__.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_fusion_ops/common_fusion_ops.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_fusion_ops/npu_fusion_ops.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl/_ppocrvl.py => paddleocr_vl/_paddleocr_vl.py} (99%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_projector.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_refined_recompute/__init__.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_refined_recompute/utils.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_sequence_parallel_utils.py (100%)
 rename paddlex/inference/models/doc_vlm/modeling/{ppocrvl => paddleocr_vl}/_siglip.py (99%)
 rename paddlex/inference/models/doc_vlm/processors/{ppocrvl => paddleocr_vl}/__init__.py (93%)
 rename paddlex/inference/models/doc_vlm/processors/{ppocrvl/_ppocrvl.py => paddleocr_vl/_paddleocr_vl.py} (99%)
 rename paddlex/inference/models/doc_vlm/processors/{ppocrvl => paddleocr_vl}/_siglip.py (100%)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 45a48104db..55e6f9eb46 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -878,15 +878,11 @@ for res in output:
 
 #### 3.1.1 使用 Docker 镜像
 
-PaddleX 针对不同推理加速框架提供了相应的 Docker 镜像，用于快速启动 VLM 推理服务：
-
-* **vLLM**：`ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server`
-* **SGLang**：`ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-sglang-server`
-
-以 vLLM 为例，可使用以下命令启动服务：
+PaddleX 提供了 Docker 镜像，用于快速启动 vLLM 推理服务。可使用以下命令启动服务：
 
 ```bash
 docker run \
+    -it \
     --rm \
     --gpus all \
     --network host \
@@ -899,6 +895,7 @@ docker run \
 
 ```bash
 docker run \
+    -it \
     --rm \
     --gpus all \
     --network host \
diff --git a/paddlex/inference/models/doc_vlm/modeling/__init__.py b/paddlex/inference/models/doc_vlm/modeling/__init__.py
index 565fd61d49..298a5b85d2 100644
--- a/paddlex/inference/models/doc_vlm/modeling/__init__.py
+++ b/paddlex/inference/models/doc_vlm/modeling/__init__.py
@@ -13,6 +13,6 @@
 # limitations under the License.
 
 from .GOT_ocr_2_0 import PPChart2TableInference
-from .ppocrvl import PPOCRVLForConditionalGeneration
+from .paddleocr_vl import PaddleOCRVLForConditionalGeneration
 from .qwen2_5_vl import PPDocBee2Inference
 from .qwen2_vl import PPDocBeeInference, Qwen2VLForConditionalGeneration
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/__init__.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/__init__.py
similarity index 90%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/__init__.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/__init__.py
index f2b00b31b0..42126a0ce8 100644
--- a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/__init__.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._ppocrvl import PPOCRVLForConditionalGeneration
+from ._paddleocr_vl import PaddleOCRVLForConditionalGeneration
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_config.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
similarity index 98%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_config.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
index 141b041bd9..6b07eac0b7 100644
--- a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_config.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
@@ -30,7 +30,7 @@
 
 
 class PPOCRVisionConfig(PretrainedConfig):
-    model_type = "ppocrvl"
+    model_type = "paddleocr_vl"
     base_config_key = "vision_config"
 
     def __init__(
@@ -67,8 +67,8 @@ def __init__(
         self.tokens_per_second = tokens_per_second
 
 
-class PPOCRVLConfig(PretrainedConfig):
-    model_type = "ppocrvl"
+class PaddleOCRVLConfig(PretrainedConfig):
+    model_type = "paddleocr_vl"
     keys_to_ignore_at_inference = ["past_key_values"]
     sub_configs = {"vision_config": PPOCRVisionConfig}
 
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_distributed/__init__.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_distributed/__init__.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_distributed/__init__.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_distributed/__init__.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_distributed/common_dist_utils.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_distributed/common_dist_utils.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_distributed/common_dist_utils.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_distributed/common_dist_utils.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_ernie.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py
similarity index 99%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_ernie.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py
index 7bc3b7f6ad..7e43b3cff0 100644
--- a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_ernie.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py
@@ -43,7 +43,7 @@
 from ....common.vlm.transformers.model_outputs import (
     BaseModelOutputWithPastAndCrossAttentions,
 )
-from ._config import PPOCRVLConfig
+from ._config import PaddleOCRVLConfig
 from ._distributed import (
     AllGatherVarlenOp,
     ColumnParallelLinear,
@@ -76,7 +76,7 @@ def calc_lm_head_logits(
     handling sequence parallelism and tensor parallelism configurations.
 
     Args:
-        config (PPOCRVLConfig): Model configuration.
+        config (PaddleOCRVLConfig): Model configuration.
         hidden_states (Tensor): Hidden states from the transformer layers
         weight (Tensor): Weight matrix for the language model head
         bias (Tensor): Bias vector for the language model head
@@ -263,7 +263,7 @@ def __init__(self, config):
         Initialize RMSNorm layer.
 
         Args:
-            config (PPOCRVLConfig): Model configuration.
+            config (PaddleOCRVLConfig): Model configuration.
         """
         super().__init__()
         self.hidden_size = config.hidden_size
@@ -324,7 +324,7 @@ def __init__(self, config):
         Initialize LayerNorm with configuration.
 
         Args:
-            config (PPOCRVLConfig): Model configuration contains normalization parameters and flags.
+            config (PaddleOCRVLConfig): Model configuration contains normalization parameters and flags.
         """
         super().__init__(config.hidden_size, epsilon=config.rms_norm_eps)
         self.config = config
@@ -334,7 +334,7 @@ def __init__(self, config):
 
 
 class KeyeRotaryEmbedding(nn.Layer):
-    def __init__(self, config: PPOCRVLConfig, device=None):
+    def __init__(self, config: PaddleOCRVLConfig, device=None):
         super().__init__()
         self.rope_kwargs = {}
         if config is None:
@@ -408,7 +408,7 @@ def __init__(self, config, layer_idx=0):
         Initialize the MLP module with configuration options.
 
         Args:
-            config (PPOCRVLConfig): Model configurations.
+            config (PaddleOCRVLConfig): Model configurations.
             layer_idx (int): Index of current layer (default: 0)
         """
         super().__init__()
@@ -507,7 +507,7 @@ def __init__(self, config, layer_idx=0):
         """Initialize the attention layer.
 
         Args:
-            config (PPOCRVLConfig): Model configuration.
+            config (PaddleOCRVLConfig): Model configuration.
             layer_idx (int, optional): Index in transformer stack. Defaults to 0.
         """
         super().__init__()
@@ -1223,7 +1223,7 @@ def __init__(self, config, return_tuple=True):
         """Initialize the pretraining criterion.
 
         Args:
-            config (PPOCRVLConfig): Model configuration.
+            config (PaddleOCRVLConfig): Model configuration.
             return_tuple (bool): Whether to return loss as tuple (loss, loss_sum). Defaults to True.
         """
         super(ErniePretrainingCriterion, self).__init__()
@@ -1516,7 +1516,7 @@ def __init__(self, config):
         """Initialize the language model head.
 
         Args:
-            config (PPOCRVLConfig): Model configuration containing:
+            config (PaddleOCRVLConfig): Model configuration containing:
                 - vocab_size: Size of vocabulary
                 - hidden_size: Dimension of hidden states
                 - tensor_parallel_degree: Degree of tensor parallelism
@@ -1632,7 +1632,7 @@ def __init__(self, config, layer_idx):
         """Initialize the decoder layer.
 
         Args:
-            config (PPOCRVLConfig): Model configuration.
+            config (PaddleOCRVLConfig): Model configuration.
             layer_idx (int): Index of this layer in the transformer stack
         """
         super().__init__()
@@ -1776,7 +1776,7 @@ def model_parallel_dropout(self):
 class Ernie4_5PretrainedModel(PretrainedModel):
     """Base class for ERNIE pretrained models."""
 
-    config_class = PPOCRVLConfig
+    config_class = PaddleOCRVLConfig
     base_model_prefix = "ernie"
 
     @classmethod
@@ -1784,7 +1784,7 @@ def _get_tensor_parallel_mappings(cls, config, is_split=True):
         """Generate tensor parallel mappings for model conversion.
 
         Args:
-            config (PPOCRVLConfig): Model configuration.
+            config (PaddleOCRVLConfig): Model configuration.
             is_split (bool): Whether to generate split mappings (True)
                             or merge mappings (False). Defaults to True.
 
@@ -2005,11 +2005,11 @@ def get_tensor_parallel_split_mappings(num_hidden_layers):
 class Ernie4_5Model(Ernie4_5PretrainedModel):
     """The core ERNIE transformer model"""
 
-    def __init__(self, config: PPOCRVLConfig):
+    def __init__(self, config: PaddleOCRVLConfig):
         """Initialize the ERNIE model architecture.
 
         Args:
-            config (PPOCRVLConfig): Model configuration.
+            config (PaddleOCRVLConfig): Model configuration.
         """
         super().__init__(config)
         self.padding_idx = config.pad_token_id
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_fusion_ops/__init__.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_fusion_ops/__init__.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_fusion_ops/__init__.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_fusion_ops/__init__.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_fusion_ops/common_fusion_ops.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_fusion_ops/common_fusion_ops.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_fusion_ops/common_fusion_ops.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_fusion_ops/common_fusion_ops.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_fusion_ops/npu_fusion_ops.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_fusion_ops/npu_fusion_ops.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_fusion_ops/npu_fusion_ops.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_fusion_ops/npu_fusion_ops.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_ppocrvl.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
similarity index 99%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_ppocrvl.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
index 6271e22540..b8385c8f75 100644
--- a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_ppocrvl.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
@@ -46,14 +46,14 @@
     CausalLMOutputWithCrossAttentions,
     ModelOutput,
 )
-from ._config import PPOCRVLConfig
+from ._config import PaddleOCRVLConfig
 from ._ernie import Ernie4_5Model, Ernie4_5PretrainedModel
 from ._projector import Projector
 from ._siglip import SiglipVisionModel
 
 
 @dataclass
-class PPOCRVLCausalLMOutputWithPast(ModelOutput):
+class PaddleOCRVLCausalLMOutputWithPast(ModelOutput):
     loss: Optional[paddle.Tensor] = None
     logits: paddle.Tensor = None
     past_key_values: Optional[List[paddle.Tensor]] = None
@@ -62,9 +62,9 @@ class PPOCRVLCausalLMOutputWithPast(ModelOutput):
     rope_deltas: Optional[paddle.Tensor] = None
 
 
-class PPOCRVLForConditionalGeneration(Ernie4_5PretrainedModel, GenerationMixin):
+class PaddleOCRVLForConditionalGeneration(Ernie4_5PretrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
-    config_class = PPOCRVLConfig
+    config_class = PaddleOCRVLConfig
     _no_split_modules = ["Ernie4_5DecoderLayer", "SiglipEncoderLayer"]
 
     base_model_prefix = ""
@@ -644,7 +644,7 @@ def forward(
         rope_deltas: Optional[paddle.Tensor] = None,
         second_per_grid_ts: Optional[paddle.Tensor] = None,
         **kwargs,
-    ) -> Union[Tuple, PPOCRVLCausalLMOutputWithPast]:
+    ) -> Union[Tuple, PaddleOCRVLCausalLMOutputWithPast]:
         output_attentions = (
             output_attentions
             if output_attentions is not None
@@ -797,7 +797,7 @@ def forward(
             output = (logits,) + outputs[1:]
             return (loss,) + output if loss is not None else output
 
-        return PPOCRVLCausalLMOutputWithPast(
+        return PaddleOCRVLCausalLMOutputWithPast(
             loss=loss,
             logits=logits,
             past_key_values=outputs.past_key_values,
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_projector.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_projector.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_projector.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_projector.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_refined_recompute/__init__.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_refined_recompute/__init__.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_refined_recompute/__init__.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_refined_recompute/__init__.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_refined_recompute/utils.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_refined_recompute/utils.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_refined_recompute/utils.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_refined_recompute/utils.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_sequence_parallel_utils.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_sequence_parallel_utils.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_sequence_parallel_utils.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_sequence_parallel_utils.py
diff --git a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_siglip.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
similarity index 99%
rename from paddlex/inference/models/doc_vlm/modeling/ppocrvl/_siglip.py
rename to paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
index 548ee786d0..f24b9b9290 100644
--- a/paddlex/inference/models/doc_vlm/modeling/ppocrvl/_siglip.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
@@ -48,7 +48,7 @@
     BaseModelOutput,
     BaseModelOutputWithPooling,
 )
-from ._config import PPOCRVisionConfig, PPOCRVLConfig
+from ._config import PaddleOCRVLConfig, PPOCRVisionConfig
 
 
 def rotate_half(x):
@@ -801,7 +801,7 @@ def forward(
 
 
 class SiglipPreTrainedModel(PretrainedModel):
-    config_class = PPOCRVLConfig
+    config_class = PaddleOCRVLConfig
     base_model_prefix = "siglip"
     supports_gradient_checkpointing = True
 
diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index f41e51daff..9d252b454d 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -103,10 +103,10 @@ def _build(self, **kwargs):
             processor: The correspounding processor for the model.
         """
         from .modeling import (
+            PaddleOCRVLForConditionalGeneration,
             PPChart2TableInference,
             PPDocBee2Inference,
             PPDocBeeInference,
-            PPOCRVLForConditionalGeneration,
         )
 
         # build processor
@@ -149,7 +149,7 @@ def _build(self, **kwargs):
                     "The PaddelOCR-VL series does not support `use_hpip=True` for now."
                 )
             with TemporaryDeviceChanger(self.device):
-                model = PPOCRVLForConditionalGeneration.from_pretrained(
+                model = PaddleOCRVLForConditionalGeneration.from_pretrained(
                     self.model_dir,
                     dtype=self.dtype,
                     convert_from_hf=True,
@@ -260,10 +260,10 @@ def build_processor(self, **kwargs):
         from ..common.tokenizer.tokenizer_utils import ChatTemplate
         from .processors import (
             GOTImageProcessor,
+            PaddleOCRVLProcessor,
             PPChart2TableProcessor,
             PPDocBee2Processor,
             PPDocBeeProcessor,
-            PPOCRVLProcessor,
             Qwen2_5_VLImageProcessor,
             Qwen2VLImageProcessor,
             SiglipImageProcessor,
@@ -298,7 +298,7 @@ def build_processor(self, **kwargs):
             tokenizer.chat_template = ChatTemplate._compile_jinja_template(
                 chat_template_file.read_text(encoding="utf-8")
             )
-            return PPOCRVLProcessor(
+            return PaddleOCRVLProcessor(
                 image_processor=image_processor,
                 tokenizer=tokenizer,
             )
diff --git a/paddlex/inference/models/doc_vlm/processors/__init__.py b/paddlex/inference/models/doc_vlm/processors/__init__.py
index 31d5c6daca..80bcf22f58 100644
--- a/paddlex/inference/models/doc_vlm/processors/__init__.py
+++ b/paddlex/inference/models/doc_vlm/processors/__init__.py
@@ -13,6 +13,6 @@
 # limitations under the License.
 
 from .GOT_ocr_2_0 import GOTImageProcessor, PPChart2TableProcessor
-from .ppocrvl import PPOCRVLProcessor, SiglipImageProcessor
+from .paddleocr_vl import PaddleOCRVLProcessor, SiglipImageProcessor
 from .qwen2_5_vl import PPDocBee2Processor, Qwen2_5_VLImageProcessor
 from .qwen2_vl import PPDocBeeProcessor, Qwen2VLImageProcessor
diff --git a/paddlex/inference/models/doc_vlm/processors/ppocrvl/__init__.py b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/__init__.py
similarity index 93%
rename from paddlex/inference/models/doc_vlm/processors/ppocrvl/__init__.py
rename to paddlex/inference/models/doc_vlm/processors/paddleocr_vl/__init__.py
index ee42136c4a..1e5bdfafd5 100644
--- a/paddlex/inference/models/doc_vlm/processors/ppocrvl/__init__.py
+++ b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/__init__.py
@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._ppocrvl import PPOCRVLProcessor
+from ._paddleocr_vl import PaddleOCRVLProcessor
 from ._siglip import SiglipImageProcessor
diff --git a/paddlex/inference/models/doc_vlm/processors/ppocrvl/_ppocrvl.py b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py
similarity index 99%
rename from paddlex/inference/models/doc_vlm/processors/ppocrvl/_ppocrvl.py
rename to paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py
index 0705382be0..9b4dd9857b 100644
--- a/paddlex/inference/models/doc_vlm/processors/ppocrvl/_ppocrvl.py
+++ b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py
@@ -42,7 +42,7 @@
 from ..common import BatchFeature, fetch_image
 
 
-class PPOCRVLProcessor(object):
+class PaddleOCRVLProcessor(object):
     _DEFAULT_TEXT_KWARGS = {
         "padding": False,
         "return_tensors": "pd",
diff --git a/paddlex/inference/models/doc_vlm/processors/ppocrvl/_siglip.py b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_siglip.py
similarity index 100%
rename from paddlex/inference/models/doc_vlm/processors/ppocrvl/_siglip.py
rename to paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_siglip.py
diff --git a/paddlex/inference/pipelines/__init__.py b/paddlex/inference/pipelines/__init__.py
index c283b7a457..1ae8758244 100644
--- a/paddlex/inference/pipelines/__init__.py
+++ b/paddlex/inference/pipelines/__init__.py
@@ -41,7 +41,7 @@
 from .ocr import OCRPipeline
 from .open_vocabulary_detection import OpenVocabularyDetectionPipeline
 from .open_vocabulary_segmentation import OpenVocabularySegmentationPipeline
-from .paddleocr_vl import PPOCRVLPipeline
+from .paddleocr_vl import PaddleOCRVLPipeline
 from .pp_chatocr import PP_ChatOCRv3_Pipeline, PP_ChatOCRv4_Pipeline
 from .pp_doctranslation import PP_DocTranslation_Pipeline
 from .pp_shitu_v2 import ShiTuV2Pipeline
diff --git a/paddlex/inference/pipelines/paddleocr_vl/__init__.py b/paddlex/inference/pipelines/paddleocr_vl/__init__.py
index 7eae598a68..b50ca9d04f 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/__init__.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .pipeline import PPOCRVLPipeline
+from .pipeline import PaddleOCRVLPipeline
diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
index 7d70899cec..0155424ccf 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
@@ -32,7 +32,7 @@
 from ..base import BasePipeline
 from ..components import CropByBoxes
 from ..layout_parsing.utils import gather_imgs
-from .result import PPOCRVLBlock, PPOCRVLResult
+from .result import PaddleOCRVLBlock, PaddleOCRVLResult
 from .uilts import (
     convert_otsl_to_html,
     filter_overlap_boxes,
@@ -46,8 +46,8 @@
 
 
 @benchmark.time_methods
-class _PPOCRVLPipeline(BasePipeline):
-    """_PPOCRVLPipeline Pipeline"""
+class _PaddleOCRVLPipeline(BasePipeline):
+    """_PaddleOCRVLPipeline Pipeline"""
 
     def __init__(
         self,
@@ -314,7 +314,7 @@ def get_layout_parsing_results(
 
                     block_content = result_str
 
-                block_info = PPOCRVLBlock(
+                block_info = PaddleOCRVLBlock(
                     label=block_label,
                     bbox=block_bbox,
                     content=block_content,
@@ -359,7 +359,7 @@ def predict(
         min_pixels: Optional[int] = None,
         max_pixels: Optional[int] = None,
         **kwargs,
-    ) -> PPOCRVLResult:
+    ) -> PaddleOCRVLResult:
         """
         Predicts the layout parsing result for the given input.
 
@@ -379,7 +379,7 @@ def predict(
             **kwargs (Any): Additional settings to extend functionality.
 
         Returns:
-            PPOCRVLResult: The predicted layout parsing result.
+            PaddleOCRVLResult: The predicted layout parsing result.
         """
         model_settings = self.get_model_settings(
             use_doc_orientation_classify,
@@ -532,7 +532,7 @@ def _process_vlm(results_cv):
                     "imgs_in_doc": imgs_in_doc_for_img,
                     "model_settings": model_settings,
                 }
-                yield PPOCRVLResult(single_img_res)
+                yield PaddleOCRVLResult(single_img_res)
 
         if use_queues:
             max_num_batches_in_process = 64
@@ -677,12 +677,12 @@ def _worker_vlm():
 
 
 @pipeline_requires_extra("ocr")
-class PPOCRVLPipeline(AutoParallelImageSimpleInferencePipeline):
+class PaddleOCRVLPipeline(AutoParallelImageSimpleInferencePipeline):
     entities = "PaddleOCR-VL"
 
     @property
     def _pipeline_cls(self):
-        return _PPOCRVLPipeline
+        return _PaddleOCRVLPipeline
 
     def _get_batch_size(self, config):
         return config.get("batch_size", 1)
diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py
index 0d6e163c37..cef785acce 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/result.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/result.py
@@ -54,12 +54,12 @@
 ]
 
 
-class PPOCRVLBlock(object):
-    """PPOCRVL Block Class"""
+class PaddleOCRVLBlock(object):
+    """PaddleOCRVL Block Class"""
 
     def __init__(self, label, bbox, content="") -> None:
         """
-        Initialize a PPOCRVLBlock object.
+        Initialize a PaddleOCRVLBlock object.
 
         Args:
             label (str): Label assigned to the block.
@@ -211,9 +211,9 @@ def build_handle_funcs_dict(
     }
 
 
-class PPOCRVLResult(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
+class PaddleOCRVLResult(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
     """
-    PPOCRVLResult class for holding and formatting OCR/VL parsing results.
+    PaddleOCRVLResult class for holding and formatting OCR/VL parsing results.
     """
 
     def __init__(self, data) -> None:

From 680a1483d59e42ad96aa8372a0990db26d588a3f Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 16 Oct 2025 15:09:04 +0800
Subject: [PATCH 02/49] Do not use direct links (#4618)

---
 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md | 6 +++++-
 setup.py                                                    | 5 +----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 55e6f9eb46..51ff4b3cb4 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -289,7 +289,11 @@ comments: true
 ## 2. 快速开始
 PaddleX 所提供的模型产线均可以快速体验效果，你可以在本地使用命令行或 Python 体验通用通用版面解析v3产线的效果。
 
-在本地使用通用版面解析v3产线前，请确保您已经按照[PaddleX本地安装教程](../../../installation/installation.md)完成了PaddleX的wheel包安装。如果您希望选择性安装依赖，请参考安装教程中的相关说明。该产线对应的依赖分组为 `ocr`。
+在本地使用通用版面解析v3产线前，请确保您已经按照[PaddleX本地安装教程](../../../installation/installation.md)完成了PaddleX的wheel包安装。如果您希望选择性安装依赖，请参考安装教程中的相关说明。该产线对应的依赖分组为 `ocr`。此外，为了使用飞桨框架读取 safetensors 格式模型，请执行如下命令安装 safetensors：
+
+```bash
+pytyon -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+```
 
 > 默认配置暂不支持 Compute Capability 低于 8.0 的 GPU（如 V100、RTX 3060 等）。请参阅下一节，了解如何在此类 GPU 上使用推理加速框架。
 
diff --git a/setup.py b/setup.py
index 5a59c714a8..8c943904ba 100644
--- a/setup.py
+++ b/setup.py
@@ -64,10 +64,7 @@
     "regex": "",
     "requests": "",
     "ruamel.yaml": "",
-    "safetensors": [
-        "@ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl ; sys_platform == 'linux'",
-        "; sys_platform != 'linux'",
-    ],
+    "safetensors": "",
     "scikit-image": "",
     "scikit-learn": "",
     "sentencepiece": "",

From e1664fe3345896f60799e364425ac8eb26b0b935 Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:08:28 +0800
Subject: [PATCH 03/49] fix PaddleOCR-VL name - local (#4617)

* fix PaddleOCR-VL name - local

* fix
---
 .../models/doc_vlm/modeling/paddleocr_vl/_config.py    |  4 ++--
 .../models/doc_vlm/modeling/paddleocr_vl/_siglip.py    | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
index 6b07eac0b7..6c563a0cda 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
@@ -29,7 +29,7 @@
 from ....common.vlm.transformers import PretrainedConfig
 
 
-class PPOCRVisionConfig(PretrainedConfig):
+class PaddleOCRVisionConfig(PretrainedConfig):
     model_type = "paddleocr_vl"
     base_config_key = "vision_config"
 
@@ -70,7 +70,7 @@ def __init__(
 class PaddleOCRVLConfig(PretrainedConfig):
     model_type = "paddleocr_vl"
     keys_to_ignore_at_inference = ["past_key_values"]
-    sub_configs = {"vision_config": PPOCRVisionConfig}
+    sub_configs = {"vision_config": PaddleOCRVisionConfig}
 
     base_model_tp_plan = {
         "layers.*.self_attn.q_proj": "colwise",
diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
index f24b9b9290..a4a3c4a0c1 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
@@ -48,7 +48,7 @@
     BaseModelOutput,
     BaseModelOutputWithPooling,
 )
-from ._config import PaddleOCRVLConfig, PPOCRVisionConfig
+from ._config import PaddleOCRVisionConfig, PaddleOCRVLConfig
 
 
 def rotate_half(x):
@@ -617,7 +617,7 @@ def forward(
 class SiglipMultiheadAttentionPoolingHead(nn.Layer):
     """Multihead Attention Pooling."""
 
-    def __init__(self, config: PPOCRVisionConfig):
+    def __init__(self, config: PaddleOCRVisionConfig):
         super().__init__()
 
         self.probe = self.create_parameter(
@@ -646,7 +646,7 @@ def forward(self, hidden_state, key_padding_mask=None):
 
 
 class SiglipVisionTransformer(nn.Layer):
-    def __init__(self, config: PPOCRVisionConfig):
+    def __init__(self, config: PaddleOCRVisionConfig):
         super().__init__()
         self.config = config
         embed_dim = config.hidden_size
@@ -816,10 +816,10 @@ class SiglipPreTrainedModel(PretrainedModel):
 
 
 class SiglipVisionModel(SiglipPreTrainedModel):
-    config_class = PPOCRVisionConfig
+    config_class = PaddleOCRVisionConfig
     main_input_name = "pixel_values"
 
-    def __init__(self, config: PPOCRVisionConfig):
+    def __init__(self, config: PaddleOCRVisionConfig):
         super().__init__(config)
 
         self.vision_model = SiglipVisionTransformer(config)

From 1c2f2865f141f558c76e247f22012c86c7e94fb0 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 16 Oct 2025 15:14:31 +0800
Subject: [PATCH 04/49] Fix mkdocs.yml (#4619)

---
 mkdocs.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 338e0f4225..907cb87bad 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -107,6 +107,7 @@ plugins:
             公式识别产线: Formula Recognition
             印章文本识别产线: Seal Recognition
             文档图像预处理产线: Document Image Preprocessing
+            PaddleOCR-VL产线: PaddleOCR-VL
             计算机视觉: Computer Vision
             通用图像分类: General Image Classification
             通用目标检测: General Object Detection
@@ -339,6 +340,7 @@ nav:
          - 公式识别产线: pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
          - 印章文本识别产线: pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
          - 文档图像预处理产线: pipeline_usage/tutorials/ocr_pipelines/doc_preprocessor.md
+         - PaddleOCR-VL产线: pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
        - 计算机视觉:
          - 通用图像分类: pipeline_usage/tutorials/cv_pipelines/image_classification.md
          - 通用目标检测: pipeline_usage/tutorials/cv_pipelines/object_detection.md

From 682c15b3008c0bc1e7c61b867dfe4e13f244eb86 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 16 Oct 2025 15:48:52 +0800
Subject: [PATCH 05/49] Fix typos (#4621)

---
 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 51ff4b3cb4..939bcf50ad 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -292,7 +292,7 @@ PaddleX 所提供的模型产线均可以快速体验效果，你可以在本地
 在本地使用通用版面解析v3产线前，请确保您已经按照[PaddleX本地安装教程](../../../installation/installation.md)完成了PaddleX的wheel包安装。如果您希望选择性安装依赖，请参考安装教程中的相关说明。该产线对应的依赖分组为 `ocr`。此外，为了使用飞桨框架读取 safetensors 格式模型，请执行如下命令安装 safetensors：
 
 ```bash
-pytyon -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
 ```
 
 > 默认配置暂不支持 Compute Capability 低于 8.0 的 GPU（如 V100、RTX 3060 等）。请参阅下一节，了解如何在此类 GPU 上使用推理加速框架。
@@ -961,8 +961,8 @@ paddlex --get_pipeline_config PaddleOCR-VL
 VLRecognition:
   ...
   genai_config:
-    backend: vllm
-    server_url: http://127.0.0.1:8118
+    backend: vllm-server
+    server_url: http://127.0.0.1:8118/v1
 ```
 
 之后，可以使用修改好的配置文件进行产线调用。例如通过 CLI 调用：

From a2be29d8deb5856f1fa951ec597f341e00eafb9d Mon Sep 17 00:00:00 2001
From: changdazhou <142379845+changdazhou@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:51:34 +0800
Subject: [PATCH 06/49] support concatenate_markdown_pages (#4622)

* add PP-DocLayoutV2 in official models

* support concatenate_markdown_pages
---
 .../pipelines/paddleocr_vl/pipeline.py          | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
index 0155424ccf..cff337ae79 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
@@ -675,6 +675,23 @@ def _worker_vlm():
                 if thread_vlm.is_alive():
                     logging.warning("VLM worker did not terminate in time")
 
+    def concatenate_markdown_pages(self, markdown_list: list) -> tuple:
+        """
+        Concatenate Markdown content from multiple pages into a single document.
+
+        Args:
+            markdown_list (list): A list containing Markdown data for each page.
+
+        Returns:
+            tuple: A tuple containing the processed Markdown text.
+        """
+        markdown_texts = ""
+
+        for res in markdown_list:
+            markdown_texts += "\n\n" + res["markdown_texts"]
+
+        return markdown_texts
+
 
 @pipeline_requires_extra("ocr")
 class PaddleOCRVLPipeline(AutoParallelImageSimpleInferencePipeline):

From 0e27be0ea79bb999db2906a2ff45b68be65fd868 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Thu, 16 Oct 2025 07:57:06 +0000
Subject: [PATCH 07/49] Bump version to 3.3.1

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 15a2799817..bea438e9ad 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.0
+3.3.1

From 518d72c894d33ea5d6beb9be710c7c45fd47c027 Mon Sep 17 00:00:00 2001
From: gaotingquan <gaotingquan@baidu.com>
Date: Thu, 16 Oct 2025 09:30:26 +0000
Subject: [PATCH 08/49] PaddleOCR-VL, PP-DocLayoutV2 has been upload to models
 hosting platform

---
 paddlex/inference/utils/official_models.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/paddlex/inference/utils/official_models.py b/paddlex/inference/utils/official_models.py
index ffc352b57f..467f131434 100644
--- a/paddlex/inference/utils/official_models.py
+++ b/paddlex/inference/utils/official_models.py
@@ -45,7 +45,7 @@
     "ResNet152",
     "ResNet152_vd",
     "ResNet200_vd",
-    "PaddleOCR-VL-0.9B",
+    "PaddleOCR-VL",
     "PP-LCNet_x0_25",
     "PP-LCNet_x0_25_textline_ori",
     "PP-LCNet_x0_35",
@@ -345,6 +345,7 @@
     "en_PP-OCRv5_mobile_rec",
     "th_PP-OCRv5_mobile_rec",
     "el_PP-OCRv5_mobile_rec",
+    "PaddleOCR-VL",
     "PicoDet_layout_1x",
     "PicoDet_layout_1x_table",
     "PicoDet-L_layout_17cls",
@@ -356,6 +357,7 @@
     "PP-DocBee-2B",
     "PP-DocBee-7B",
     "PP-DocBlockLayout",
+    "PP-DocLayoutV2",
     "PP-DocLayout-L",
     "PP-DocLayout-M",
     "PP-DocLayout_plus-L",
@@ -429,7 +431,7 @@ def get_model(self, model_name):
             self._download(model_name, model_dir)
 
         return (
-            model_dir / "PaddleOCR-VL-0.9B"
+            model_dir / "PaddleOCR-VL"
             if model_name == "PaddleOCR-VL-0.9B"
             else model_dir
         )

From b66127319e544ff8e485ae29efbc2ce769517151 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Thu, 16 Oct 2025 11:09:02 +0000
Subject: [PATCH 09/49] Bump version to 3.2.2

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index bea438e9ad..4772543317 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.1
+3.3.2

From f887a3092803039c53d0881ff0ee61369e6f83c7 Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Thu, 16 Oct 2025 20:36:31 +0800
Subject: [PATCH 10/49] genai plugin: add wheel package (#4626)

---
 paddlex/paddlex_cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddlex/paddlex_cli.py b/paddlex/paddlex_cli.py
index 626c66e51d..5fc9c0fb99 100644
--- a/paddlex/paddlex_cli.py
+++ b/paddlex/paddlex_cli.py
@@ -369,6 +369,7 @@ def _install_genai_deps(plugin_types):
         for plugin_type in plugin_types:
             if "vllm" in plugin_type or "sglang" in plugin_type:
                 try:
+                    install_packages(["wheel"], constraints="required")
                     if "vllm" in plugin_type:
                         install_packages(
                             ["flash-attn == 2.8.3"], constraints="required"

From 599daa2d0eb886bb895476150a29600b1c8b96f0 Mon Sep 17 00:00:00 2001
From: gaotingquan <gaotingquan@baidu.com>
Date: Thu, 16 Oct 2025 12:08:40 +0000
Subject: [PATCH 11/49] bugfix: map PaddleOCR-VL-0.9B to PaddleOCR-VL

---
 paddlex/inference/utils/official_models.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/paddlex/inference/utils/official_models.py b/paddlex/inference/utils/official_models.py
index 467f131434..e057f11a07 100644
--- a/paddlex/inference/utils/official_models.py
+++ b/paddlex/inference/utils/official_models.py
@@ -45,7 +45,7 @@
     "ResNet152",
     "ResNet152_vd",
     "ResNet200_vd",
-    "PaddleOCR-VL",
+    "PaddleOCR-VL-0.9B",
     "PP-LCNet_x0_25",
     "PP-LCNet_x0_25_textline_ori",
     "PP-LCNet_x0_35",
@@ -345,7 +345,7 @@
     "en_PP-OCRv5_mobile_rec",
     "th_PP-OCRv5_mobile_rec",
     "el_PP-OCRv5_mobile_rec",
-    "PaddleOCR-VL",
+    "PaddleOCR-VL-0.9B",
     "PicoDet_layout_1x",
     "PicoDet_layout_1x_table",
     "PicoDet-L_layout_17cls",
@@ -419,6 +419,9 @@ def get_model(self, model_name):
         assert (
             model_name in self.model_list
         ), f"The model {model_name} is not supported on hosting {self.__class__.__name__}!"
+        if model_name == "PaddleOCR-VL-0.9B":
+            model_name = "PaddleOCR-VL"
+
         model_dir = self._save_dir / f"{model_name}"
         if os.path.exists(model_dir):
             logging.info(
@@ -431,8 +434,8 @@ def get_model(self, model_name):
             self._download(model_name, model_dir)
 
         return (
-            model_dir / "PaddleOCR-VL"
-            if model_name == "PaddleOCR-VL-0.9B"
+            model_dir / "PaddleOCR-VL-0.9B"
+            if model_name == "PaddleOCR-VL"
             else model_dir
         )
 

From d442a10068f4af0fda5da168f1db828ab0f2d178 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Thu, 16 Oct 2025 13:05:10 +0000
Subject: [PATCH 12/49] Bump version to 3.3.3

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 4772543317..619b537668 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.2
+3.3.3

From 5c8b02f00d3f42446095a3cf66c0147bca25c6b3 Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Fri, 17 Oct 2025 17:58:39 +0800
Subject: [PATCH 13/49] [cherry-pick] use FlashAttention 2.8.2 (#4631)

---
 .../tutorials/ocr_pipelines/PaddleOCR-VL.md   | 21 +++++++++++++++++++
 paddlex/paddlex_cli.py                        |  9 +-------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 939bcf50ad..5f73b5d5e0 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -287,6 +287,7 @@ comments: true
     * 由于我们没有收集NPU和XPU的设备内存数据，因此表中相应位置的数据标记为N/A。
 
 ## 2. 快速开始
+
 PaddleX 所提供的模型产线均可以快速体验效果，你可以在本地使用命令行或 Python 体验通用通用版面解析v3产线的效果。
 
 在本地使用通用版面解析v3产线前，请确保您已经按照[PaddleX本地安装教程](../../../installation/installation.md)完成了PaddleX的wheel包安装。如果您希望选择性安装依赖，请参考安装教程中的相关说明。该产线对应的依赖分组为 `ocr`。此外，为了使用飞桨框架读取 safetensors 格式模型，请执行如下命令安装 safetensors：
@@ -907,6 +908,20 @@ docker run \
     paddlex_genai_server --model_name PaddleOCR-VL-0.9B --host 0.0.0.0 --port 8118 --backend vllm
 ```
 
+若您使用的是  NVIDIA 50 系显卡 (Compute Capacity >= 12)，需要在启动服务前安装指定版本的 FlashAttention:
+
+```
+docker run \
+    -it \
+    --rm \
+    --gpus all \
+    --network host \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server \
+    /bin/bash
+python -m pip install flash-attn==2.8.3
+paddlex_genai_server --model_name PaddleOCR-VL-0.9B --backend vllm --port 8118
+```
+
 #### 3.1.2 通过 PaddleX CLI 和启动
 
 由于推理加速框架可能与飞桨框架存在依赖冲突，建议在虚拟环境中安装。示例如下：
@@ -924,6 +939,12 @@ paddlex --install genai-vllm-server
 # paddlex --install genai-sglang-server
 ```
 
+若您使用的是  NVIDIA 50 系显卡 (Compute Capacity >= 12)，需要在启动服务前安装指定版本的 flash-attn:
+
+```
+python -m pip install flash-attn==2.8.3
+```
+
 安装完成后，可通过 `paddlex_genai_server` 命令启动服务：
 
 ```bash
diff --git a/paddlex/paddlex_cli.py b/paddlex/paddlex_cli.py
index 5fc9c0fb99..a5c27a1e4a 100644
--- a/paddlex/paddlex_cli.py
+++ b/paddlex/paddlex_cli.py
@@ -370,14 +370,7 @@ def _install_genai_deps(plugin_types):
             if "vllm" in plugin_type or "sglang" in plugin_type:
                 try:
                     install_packages(["wheel"], constraints="required")
-                    if "vllm" in plugin_type:
-                        install_packages(
-                            ["flash-attn == 2.8.3"], constraints="required"
-                        )
-                    elif "sglang" in plugin_type:
-                        install_packages(
-                            ["flash-attn == 2.8.2"], constraints="required"
-                        )
+                    install_packages(["flash-attn == 2.8.2"], constraints="required")
                 except Exception:
                     logging.error("Installation failed", exc_info=True)
                     sys.exit(1)

From d82d0918fae0a00bcc2f5ca0ff86142e4c18108c Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 17 Oct 2025 18:05:19 +0800
Subject: [PATCH 14/49] Fix HPS bugs (#4633)

---
 deploy/hps/sdk/scripts/assemble.sh             | 2 +-
 deploy/hps/server_env/Dockerfile               | 7 +++----
 deploy/hps/server_env/requirements/cpu.txt     | 8 +++++++-
 deploy/hps/server_env/requirements/cpu_dev.txt | 2 +-
 deploy/hps/server_env/requirements/cpu_hpi.txt | 2 +-
 deploy/hps/server_env/requirements/gpu.txt     | 8 +++++++-
 deploy/hps/server_env/requirements/gpu_dev.txt | 2 +-
 deploy/hps/server_env/requirements/gpu_hpi.txt | 2 +-
 8 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/deploy/hps/sdk/scripts/assemble.sh b/deploy/hps/sdk/scripts/assemble.sh
index 405bb0e24e..d06fa49daa 100755
--- a/deploy/hps/sdk/scripts/assemble.sh
+++ b/deploy/hps/sdk/scripts/assemble.sh
@@ -8,5 +8,5 @@ docker run \
     -v "$(pwd)":/workspace \
     -w /workspace \
     --rm \
-    python:3.10@sha256:6ff000548a4fa34c1be02624836e75e212d4ead8227b4d4381c3ae998933a922 \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/linmanhui/python:3.10 \
     /bin/bash scripts/_assemble.sh "$@"
diff --git a/deploy/hps/server_env/Dockerfile b/deploy/hps/server_env/Dockerfile
index e7db00228d..1038a12d7f 100644
--- a/deploy/hps/server_env/Dockerfile
+++ b/deploy/hps/server_env/Dockerfile
@@ -33,9 +33,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 
 RUN mkdir /paddlex
 
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends software-properties-common \
-    && add-apt-repository -y ppa:deadsnakes/ppa \
+RUN echo 'deb http://archive.ubuntu.com/ubuntu jammy main universe' > /etc/apt/sources.list.d/jammy-temp.list \
     && apt-get update \
     && apt-get install -y --no-install-recommends python3.10 python3.10-venv \
     && python3.10 -m venv /paddlex/py310 \
@@ -101,7 +99,8 @@ RUN --mount=type=bind,source=deploy/hps/server_env/requirements/${DEVICE_TYPE}.t
     python -m pip install --requirement /tmp/requirements.txt --requirement /tmp/hpi_requirements.txt \
     && if [ "${ENV_TYPE}" = 'dev' ]; then \
         python -m pip install --requirement /tmp/dev_requirements.txt; \
-    fi
+    fi \
+    && python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
 
 RUN --mount=type=bind,source=.,target=/tmp/PaddleX,rw \
     python -m pip install --no-deps /tmp/PaddleX
diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt
index 246f8ba7f6..bbbeaab8e4 100644
--- a/deploy/hps/server_env/requirements/cpu.txt
+++ b/deploy/hps/server_env/requirements/cpu.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --extra=base --extra=serving --no-emit-index-url --no-emit-trusted-host --output-file=requirements/cpu.txt --strip-extras ../../../setup.py paddlex-hps-server/pyproject.toml requirements/app.in requirements/cpu.in
+#    pip-compile --allow-unsafe --cert=None --client-cert=None --extra=base --extra=serving --index-url=None --no-emit-index-url --no-emit-trusted-host --output-file=requirements/cpu.txt --pip-args=None --strip-extras ../../../setup.py paddlex-hps-server/pyproject.toml requirements/app.in requirements/cpu.in
 #
 aiohappyeyeballs==2.4.6
     # via aiohttp
@@ -289,6 +289,8 @@ pyparsing==3.2.1
     # via matplotlib
 pypdfium2==4.30.1
     # via paddlex (../../../setup.py)
+python-bidi==0.6.6
+    # via paddlex (../../../setup.py)
 python-dateutil==2.9.0.post0
     # via
     #   matplotlib
@@ -324,6 +326,8 @@ ruamel-yaml==0.18.10
     # via paddlex (../../../setup.py)
 ruamel-yaml-clib==0.2.12
     # via ruamel-yaml
+safetensors==0.6.2
+    # via paddlex (../../../setup.py)
 scikit-image==0.24.0
     # via paddlex (../../../setup.py)
 scikit-learn==1.6.1
@@ -332,6 +336,8 @@ scipy==1.15.2
     # via
     #   scikit-image
     #   scikit-learn
+sentencepiece==0.2.1
+    # via paddlex (../../../setup.py)
 shapely==2.0.7
     # via paddlex (../../../setup.py)
 six==1.17.0
diff --git a/deploy/hps/server_env/requirements/cpu_dev.txt b/deploy/hps/server_env/requirements/cpu_dev.txt
index ad1a87d716..7c6e251f59 100644
--- a/deploy/hps/server_env/requirements/cpu_dev.txt
+++ b/deploy/hps/server_env/requirements/cpu_dev.txt
@@ -2,5 +2,5 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --constraint=requirements/cpu.txt --no-emit-index-url --no-emit-trusted-host --output-file=requirements/cpu_dev.txt --strip-extras requirements/cpu_dev.in
+#    pip-compile --allow-unsafe --cert=None --client-cert=None --constraint=requirements/cpu.txt --index-url=None --no-emit-index-url --no-emit-trusted-host --output-file=requirements/cpu_dev.txt --pip-args=None --strip-extras requirements/cpu_dev.in
 #
diff --git a/deploy/hps/server_env/requirements/cpu_hpi.txt b/deploy/hps/server_env/requirements/cpu_hpi.txt
index 839b28b45f..61dd0c4bc3 100644
--- a/deploy/hps/server_env/requirements/cpu_hpi.txt
+++ b/deploy/hps/server_env/requirements/cpu_hpi.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --constraint=requirements/cpu.txt --no-emit-index-url --no-emit-trusted-host --output-file=requirements/cpu_hpi.txt --strip-extras requirements/cpu_hpi.in
+#    pip-compile --allow-unsafe --cert=None --client-cert=None --constraint=requirements/cpu.txt --index-url=None --no-emit-index-url --no-emit-trusted-host --output-file=requirements/cpu_hpi.txt --pip-args=None --strip-extras requirements/cpu_hpi.in
 #
 certifi==2025.1.31
     # via
diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt
index 4deaf930bb..3484d72f48 100644
--- a/deploy/hps/server_env/requirements/gpu.txt
+++ b/deploy/hps/server_env/requirements/gpu.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --extra=base --extra=serving --no-emit-index-url --no-emit-trusted-host --output-file=requirements/gpu.txt --strip-extras ../../../setup.py paddlex-hps-server/pyproject.toml requirements/app.in requirements/gpu.in
+#    pip-compile --allow-unsafe --cert=None --client-cert=None --extra=base --extra=serving --index-url=None --no-emit-index-url --no-emit-trusted-host --output-file=requirements/gpu.txt --pip-args=None --strip-extras ../../../setup.py paddlex-hps-server/pyproject.toml requirements/app.in requirements/gpu.in
 #
 aiohappyeyeballs==2.4.6
     # via aiohttp
@@ -289,6 +289,8 @@ pyparsing==3.2.1
     # via matplotlib
 pypdfium2==4.30.1
     # via paddlex (../../../setup.py)
+python-bidi==0.6.6
+    # via paddlex (../../../setup.py)
 python-dateutil==2.9.0.post0
     # via
     #   matplotlib
@@ -324,6 +326,8 @@ ruamel-yaml==0.18.10
     # via paddlex (../../../setup.py)
 ruamel-yaml-clib==0.2.12
     # via ruamel-yaml
+safetensors==0.6.2
+    # via paddlex (../../../setup.py)
 scikit-image==0.24.0
     # via paddlex (../../../setup.py)
 scikit-learn==1.6.1
@@ -332,6 +336,8 @@ scipy==1.15.2
     # via
     #   scikit-image
     #   scikit-learn
+sentencepiece==0.2.1
+    # via paddlex (../../../setup.py)
 shapely==2.0.7
     # via paddlex (../../../setup.py)
 six==1.17.0
diff --git a/deploy/hps/server_env/requirements/gpu_dev.txt b/deploy/hps/server_env/requirements/gpu_dev.txt
index 0e7e5715f8..8794c5c46c 100644
--- a/deploy/hps/server_env/requirements/gpu_dev.txt
+++ b/deploy/hps/server_env/requirements/gpu_dev.txt
@@ -2,5 +2,5 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --constraint=requirements/gpu.txt --no-emit-index-url --no-emit-trusted-host --output-file=requirements/gpu_dev.txt --strip-extras requirements/gpu_dev.in
+#    pip-compile --allow-unsafe --cert=None --client-cert=None --constraint=requirements/gpu.txt --index-url=None --no-emit-index-url --no-emit-trusted-host --output-file=requirements/gpu_dev.txt --pip-args=None --strip-extras requirements/gpu_dev.in
 #
diff --git a/deploy/hps/server_env/requirements/gpu_hpi.txt b/deploy/hps/server_env/requirements/gpu_hpi.txt
index 00bd86d6a1..de1a847d4e 100644
--- a/deploy/hps/server_env/requirements/gpu_hpi.txt
+++ b/deploy/hps/server_env/requirements/gpu_hpi.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --constraint=requirements/gpu.txt --no-emit-index-url --no-emit-trusted-host --output-file=requirements/gpu_hpi.txt --strip-extras requirements/gpu_hpi.in
+#    pip-compile --allow-unsafe --cert=None --client-cert=None --constraint=requirements/gpu.txt --index-url=None --no-emit-index-url --no-emit-trusted-host --output-file=requirements/gpu_hpi.txt --pip-args=None --strip-extras requirements/gpu_hpi.in
 #
 certifi==2025.1.31
     # via

From 2ffd6c7918ff1880a848015125d0ef90838f8ecc Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Fri, 17 Oct 2025 18:18:43 +0800
Subject: [PATCH 15/49] [cherry-pick] fix typo (#4634)

---
 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 5f73b5d5e0..455ccd09d1 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -939,7 +939,7 @@ paddlex --install genai-vllm-server
 # paddlex --install genai-sglang-server
 ```
 
-若您使用的是  NVIDIA 50 系显卡 (Compute Capacity >= 12)，需要在启动服务前安装指定版本的 flash-attn:
+若您使用的是  NVIDIA 50 系显卡 (Compute Capacity >= 12)，需要在启动服务前安装指定版本的 FlashAttention:
 
 ```
 python -m pip install flash-attn==2.8.3

From c8d21e6a1cba47d93d6f3a28a97e3089767044ae Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Mon, 20 Oct 2025 03:55:26 +0000
Subject: [PATCH 16/49] Cap langchain version

---
 setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 8c943904ba..031a8ed5a9 100644
--- a/setup.py
+++ b/setup.py
@@ -38,10 +38,10 @@
     "imagesize": "",
     "Jinja2": "",
     "joblib": "",
-    "langchain": ">= 0.2",
-    "langchain-community": ">= 0.2",
+    "langchain": ">= 0.2, < 1.0",
+    "langchain-community": ">= 0.2, < 1.0",
     "langchain-core": "",
-    "langchain-openai": ">= 0.1",
+    "langchain-openai": ">= 0.1, < 1.0",
     "lxml": "",
     "matplotlib": "",
     "modelscope": ">=1.28.0",

From 0d397f50c7029f3339384c8e591ec0efb3834059 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Mon, 20 Oct 2025 23:41:16 +0800
Subject: [PATCH 17/49] [Cherry-Pick] #4643 #4645 #4648 (#4649)

* Fix unboundlocalerror (#4641)

* Add PP-DocLayoutV2 to HPI info (#4643)

* [Docs] Add missing param (#4648)

* Add missing param

* Remove unused extra
---
 .../tutorials/ocr_pipelines/PaddleOCR-VL.md   | 12 ++++++---
 .../common/vlm/transformers/model_utils.py    |  4 +++
 .../utils/hpi_model_info_collection.json      | 27 +++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 455ccd09d1..d7d8db37bb 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -78,8 +78,8 @@ comments: true
 </thead>
 <tbody>
 <tr>
-<td>PP-DocLayoutV2-L</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocLayoutV2-L_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-DocLayoutV2-L_pretrained.pdparams">训练模型</a></td>
+<td>PP-DocLayoutV2</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocLayoutV2_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-DocLayoutV2_pretrained.pdparams">训练模型</a></td>
 <td>-</td>
 <td>- / -</td>
 <td>- / -</td>
@@ -932,7 +932,7 @@ python -m venv .venv
 # 激活环境
 source .venv/bin/activate
 # 安装 PaddleX
-python -m pip install "paddlex[ocr]"
+python -m pip install paddlex
 # 安装 vLLM 服务器插件
 paddlex --install genai-vllm-server
 # 安装 SGLang 服务器插件
@@ -1167,6 +1167,12 @@ PaddleX 会将来自单张或多张输入图像中的子图分组并对服务器
 <td>否</td>
 </tr>
 <tr>
+<td><code>useDocOrientationClassify</code></td>
+<td><code>boolean</code> | <code>null</code></td>
+<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_doc_orientation_classify</code> 参数相关说明。</td>
+<td>否</td>
+</tr>
+<tr>
 <td><code>useDocUnwarping</code></td>
 <td><code>boolean</code> | <code>null</code></td>
 <td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_doc_unwarping</code> 参数相关说明。</td>
diff --git a/paddlex/inference/models/common/vlm/transformers/model_utils.py b/paddlex/inference/models/common/vlm/transformers/model_utils.py
index aaa7600543..26e408d586 100644
--- a/paddlex/inference/models/common/vlm/transformers/model_utils.py
+++ b/paddlex/inference/models/common/vlm/transformers/model_utils.py
@@ -1607,6 +1607,8 @@ def _fuse_or_split_keys(
                 except NotImplementedError:
                     if convert_from_hf:
                         raise ValueError("`convert_from_hf=True` is not supported")
+                    else:
+                        transpose_weight_keys = None
                 state_dict = load_state_dict(
                     shard_file,
                     tp_actions if pre_tensor_parallel_split else None,
@@ -1934,6 +1936,8 @@ def from_pretrained(
                 except NotImplementedError:
                     if convert_from_hf:
                         raise ValueError("`convert_from_hf=True` is not supported")
+                    else:
+                        transpose_weight_keys = None
                 state_dict = load_state_dict(
                     resolved_archive_file,
                     convert_from_hf=convert_from_hf,
diff --git a/paddlex/inference/utils/hpi_model_info_collection.json b/paddlex/inference/utils/hpi_model_info_collection.json
index 358c887887..78835aa809 100644
--- a/paddlex/inference/utils/hpi_model_info_collection.json
+++ b/paddlex/inference/utils/hpi_model_info_collection.json
@@ -1465,6 +1465,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     },
     "paddle31": {
@@ -2946,6 +2949,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     },
     "paddle311": {
@@ -4428,6 +4434,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     }
   },
@@ -5849,6 +5858,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     },
     "paddle31": {
@@ -7288,6 +7300,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     },
     "paddle311": {
@@ -8726,6 +8741,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     }
   },
@@ -9774,6 +9792,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     },
     "paddle31": {
@@ -10835,6 +10856,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     },
     "paddle311": {
@@ -11880,6 +11904,9 @@
       ],
       "YOWO": [
         "paddle"
+      ],
+      "PP-DocLayoutV2": [
+        "paddle"
       ]
     }
   }

From 6622b3b824439f5283a96a163c3fb9952d55a2ad Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Mon, 20 Oct 2025 15:43:54 +0000
Subject: [PATCH 18/49] Bump version to 3.3.4

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 619b537668..a0891f563f 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.3
+3.3.4

From 677ea06a9449168cf74e26a96d049c852e46796f Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Tue, 21 Oct 2025 01:27:05 +0800
Subject: [PATCH 19/49] Fix assemble script (#4650)

---
 deploy/hps/sdk/scripts/assemble.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/hps/sdk/scripts/assemble.sh b/deploy/hps/sdk/scripts/assemble.sh
index d06fa49daa..6c4257443c 100755
--- a/deploy/hps/sdk/scripts/assemble.sh
+++ b/deploy/hps/sdk/scripts/assemble.sh
@@ -8,5 +8,5 @@ docker run \
     -v "$(pwd)":/workspace \
     -w /workspace \
     --rm \
-    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/linmanhui/python:3.10 \
+    python:3.10 \
     /bin/bash scripts/_assemble.sh "$@"

From 595525459f84315fd2152ac515566d2d43f86a96 Mon Sep 17 00:00:00 2001
From: gaotingquan <gaotingquan@baidu.com>
Date: Thu, 23 Oct 2025 07:38:14 +0000
Subject: [PATCH 20/49] bugfix: fix PaddleOCR-VL downloading from AIStudio

---
 paddlex/inference/utils/official_models.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/paddlex/inference/utils/official_models.py b/paddlex/inference/utils/official_models.py
index 00de4707c5..aacd80e247 100644
--- a/paddlex/inference/utils/official_models.py
+++ b/paddlex/inference/utils/official_models.py
@@ -432,6 +432,9 @@ def get_model(self, model_name):
                 f"Using official model ({model_name}), the model files will be automatically downloaded and saved in `{model_dir}`."
             )
             self._download(model_name, model_dir)
+            logging.debug(
+                f"`{model_name}` model files has been download from model source: `{self.alias}`!"
+            )
 
         if model_name == "PaddleOCR-VL":
             vl_model_dir = model_dir / "PaddleOCR-VL-0.9B"
@@ -531,7 +534,12 @@ class _AIStudioModelHoster(_BaseModelHoster):
 
     def _download(self, model_name, save_dir):
         def _clone(local_dir):
-            aistudio_download(repo_id=f"PaddleX/{model_name}", local_dir=local_dir)
+            if model_name == "PaddleOCR-VL":
+                aistudio_download(
+                    repo_id=f"PaddlePaddle/{model_name}", local_dir=local_dir
+                )
+            else:
+                aistudio_download(repo_id=f"PaddleX/{model_name}", local_dir=local_dir)
 
         if os.path.exists(save_dir):
             _clone(save_dir)
@@ -586,9 +594,6 @@ def _download_from_hoster(self, hosters, model_name):
             if model_name in hoster.model_list:
                 try:
                     model_path = hoster.get_model(model_name)
-                    logging.debug(
-                        f"`{model_name}` model files has been download from model source: `{hoster.alias}`!"
-                    )
                     return model_path
 
                 except Exception as e:
@@ -597,7 +602,7 @@ def _download_from_hoster(self, hosters, model_name):
                             f"Encounter exception when download model from {hoster.alias}. No model source is available! Please check network or use local model files!"
                         )
                     logging.warning(
-                        f"Encountering exception when download model from {hoster.alias}: \n{e}, will try to download from other model sources: `hosters[idx + 1].alias`."
+                        f"Encountering exception when download model from {hoster.alias}: \n{e}, will try to download from other model sources: `{hosters[idx + 1].alias}`."
                     )
                     return self._download_from_hoster(hosters[idx + 1 :], model_name)
 

From c1ca6604ea96ec023377480538a524adc19f01a1 Mon Sep 17 00:00:00 2001
From: gaotingquan <gaotingquan@baidu.com>
Date: Thu, 23 Oct 2025 11:51:33 +0000
Subject: [PATCH 21/49] fix: use cv2.imdecode to support reading files with
 Chinese characters in filename

---
 paddlex/inference/utils/io/readers.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/paddlex/inference/utils/io/readers.py b/paddlex/inference/utils/io/readers.py
index f5e292887a..58ca92c539 100644
--- a/paddlex/inference/utils/io/readers.py
+++ b/paddlex/inference/utils/io/readers.py
@@ -267,7 +267,9 @@ def __init__(self, flags=None):
 
     def read_file(self, in_path):
         """read image file from path by OpenCV"""
-        return cv2.imread(in_path, flags=self.flags)
+        with open(in_path, "rb") as f:
+            img_array = np.frombuffer(f.read(), np.uint8)
+        return cv2.imdecode(img_array, flags=self.flags)
 
 
 class PILImageReaderBackend(_ImageReaderBackend):

From 32fe2f7fb1a91669443f6e5020955878c9b2bbdf Mon Sep 17 00:00:00 2001
From: zhouchangda <zhouchangda@baidu.com>
Date: Thu, 23 Oct 2025 07:06:13 +0000
Subject: [PATCH 22/49] support set max_new_tokens

---
 .../pipelines/paddleocr_vl/pipeline.py        | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
index cff337ae79..b4fb4903ff 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
@@ -249,10 +249,14 @@ def get_layout_parsing_results(
                     vlm_block_ids.append((i, j))
                     drop_figures_set.update(drop_figures)
 
+        if vlm_kwargs is None:
+            vlm_kwargs = {}
+        elif vlm_kwargs.get("max_new_tokens", None) is None:
+            vlm_kwargs["max_new_tokens"] = 4096
+
         kwargs = {
             "use_cache": True,
-            "max_new_tokens": 4096,
-            **(vlm_kwargs or {}),
+            **vlm_kwargs,
         }
         vl_rec_results = list(
             self.vl_rec_model.predict(
@@ -358,6 +362,7 @@ def predict(
         top_p: Optional[float] = None,
         min_pixels: Optional[int] = None,
         max_pixels: Optional[int] = None,
+        max_new_tokens: Optional[int] = None,
         **kwargs,
     ) -> PaddleOCRVLResult:
         """
@@ -376,6 +381,15 @@ def predict(
                 If it's a tuple of two numbers, then they are used separately for width and height respectively.
                 If it's None, then no unclipping will be performed.
             layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
+            use_queues (Optional[bool], optional): Whether to use queues. Defaults to None.
+            prompt_label (Optional[Union[str, None]], optional): The label of the prompt in ['ocr', 'formula', 'table', 'chart']. Defaults to None.
+            format_block_content (Optional[bool]): Whether to format the block content. Default is None.
+            repetition_penalty (Optional[float]): The repetition penalty parameter used for VL model sampling. Default is None.
+            temperature (Optional[float]): Temperature parameter used for VL model sampling. Default is None.
+            top_p (Optional[float]): Top-p parameter used for VL model sampling. Default is None.
+            min_pixels (Optional[int]): The minimum number of pixels allowed when the VL model preprocesses images. Default is None.
+            max_pixels (Optional[int]): The maximum number of pixels allowed when the VL model preprocesses images. Default is None.
+            max_new_tokens (Optional[int]): The maximum number of new tokens. Default is None.
             **kwargs (Any): Additional settings to extend functionality.
 
         Returns:
@@ -499,6 +513,7 @@ def _process_vlm(results_cv):
                         "top_p": top_p,
                         "min_pixels": min_pixels,
                         "max_pixels": max_pixels,
+                        "max_new_tokens": max_new_tokens,
                     },
                 )
             )

From f6bb816a22e4ec34d491a4ffb95b51e0dbc79032 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 23 Oct 2025 22:34:39 +0800
Subject: [PATCH 23/49] Remove broken quantization_config logic (#4654)

---
 .../vlm/transformers/configuration_utils.py   |  20 ----
 .../common/vlm/transformers/model_utils.py    | 108 ++----------------
 2 files changed, 7 insertions(+), 121 deletions(-)

diff --git a/paddlex/inference/models/common/vlm/transformers/configuration_utils.py b/paddlex/inference/models/common/vlm/transformers/configuration_utils.py
index 5936c41717..80845f4bf4 100644
--- a/paddlex/inference/models/common/vlm/transformers/configuration_utils.py
+++ b/paddlex/inference/models/common/vlm/transformers/configuration_utils.py
@@ -823,11 +823,6 @@ def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "PretrainedConfig":
                 )
         to_remove = []
         for key, value in kwargs.items():
-            if key == "quantization_config" and isinstance(value, Dict):
-                for q_key in value:
-                    setattr(config.quantization_config, q_key, value[q_key])
-                to_remove.append(key)
-                continue
             if hasattr(config, key):
                 setattr(config, key, value)
                 if key != "dtype":
@@ -889,11 +884,6 @@ def to_diff_dict(self, saving_file=False) -> Dict[str, Any]:
 
         # only serialize values that differ from the default config
         for key, value in config_dict.items():
-            if key == "quantization_config":
-                quantization_diff_dict = self.quantization_config.to_diff_dict()
-                if len(quantization_diff_dict) > 0:
-                    serializable_config_dict[key] = quantization_diff_dict
-                continue
             if (
                 key not in default_config_dict
                 or key == "paddlenlp_version"
@@ -942,16 +932,6 @@ def to_dict(self, saving_file=False) -> Dict[str, Any]:
                 if key in self._unsavable_keys:
                     output.pop(key)
 
-        if hasattr(self, "quantization_config"):
-            output["quantization_config"] = (
-                self.quantization_config.to_dict()
-                if not isinstance(self.quantization_config, dict)
-                else self.quantization_config
-            )
-
-            # pop the `_pre_quantization_dtype` as torch.dtypes are not serializable.
-            _ = output.pop("_pre_quantization_dtype", None)
-
         return output
 
     def update(self, config_dict: Dict[str, Any]):
diff --git a/paddlex/inference/models/common/vlm/transformers/model_utils.py b/paddlex/inference/models/common/vlm/transformers/model_utils.py
index 26e408d586..2cf13b30ae 100644
--- a/paddlex/inference/models/common/vlm/transformers/model_utils.py
+++ b/paddlex/inference/models/common/vlm/transformers/model_utils.py
@@ -258,8 +258,6 @@ def load_state_dict(
                 tensor_parallel_split_mapping,
                 fliter_dict_keys,
                 "expected",
-                quantization_linear_list=None,
-                quantization_config=None,
                 dtype=None,
                 return_numpy=False,
                 convert_from_hf=convert_from_hf,
@@ -631,34 +629,6 @@ def set_inference_config(cls, config, predictor_args, **kwargs):
         config.weightonly_group_size = predictor_args.weightonly_group_size
         config.weight_block_size = predictor_args.weight_block_size
         config.moe_quant_type = predictor_args.moe_quant_type
-        if config.quantization_config.quant_method is not None:
-            predictor_args.weight_block_size = (
-                config.quantization_config.weight_block_size
-            )
-            config.weight_block_size = predictor_args.weight_block_size
-
-        if config.quantization_config.quant_type is not None:
-            if predictor_args.mode == "dynamic":
-                predictor_args.quant_type = config.quantization_config.quant_type
-                config.quant_type = config.quantization_config.quant_type
-            if "c8" in config.quant_type:
-                predictor_args.cachekv_int8_type = "static"
-                if predictor_args.mode == "dynamic":
-                    config.cachekv_int8_type = "static"
-
-            if predictor_args.mode == "dynamic":
-                ptq_multicards_num = 0
-                if os.path.exists(config.model_name_or_path):
-                    prefix = "act_scales_"
-                    for filename in os.listdir(config.model_name_or_path):
-                        if filename.startswith(prefix):
-                            ptq_multicards_num += 1
-
-                logging.info(
-                    f"PTQ from {ptq_multicards_num} cards, so we will not split"
-                )
-                if ptq_multicards_num > 1:
-                    config.single_card_ptq = False
 
         if predictor_args.block_attn:
             config.block_size = predictor_args.block_size
@@ -1323,45 +1293,6 @@ def _load_pretrained_model(
                     ".".join([prefix, s]) for s in quantization_linear_list
                 ]
 
-        # Weight quantization if not yet quantized & update loaded_keys
-        if (
-            hasattr(config, "quantization_config")
-            and config.quantization_config.is_weight_quantize()
-        ):
-            try:
-                from ..quantization.quantization_utils import (
-                    convert_to_quantize_state_dict,
-                    update_loaded_state_dict_keys,
-                )
-            except ImportError:
-                raise ImportError(
-                    "Quantization features require `paddlepaddle >= 2.5.2`"
-                )
-            if state_dict is not None:
-                state_dict = convert_to_quantize_state_dict(
-                    state_dict,
-                    quantization_linear_list,
-                    config.quantization_config,
-                    dtype,
-                )
-                loaded_keys = [k for k in state_dict.keys()]
-            else:
-                loaded_keys = update_loaded_state_dict_keys(
-                    loaded_keys, quantization_linear_list, config.quantization_config
-                )
-            if keep_in_fp32_modules is None:
-                keep_in_fp32_modules = (
-                    ["quant_scale"]
-                    if config.quantization_config.weight_quantize_algo in ["nf4", "fp4"]
-                    else None
-                )
-            else:
-                keep_in_fp32_modules = (
-                    keep_in_fp32_modules + ["quant_scale"]
-                    if config.quantization_config.weight_quantize_algo in ["nf4", "fp4"]
-                    else keep_in_fp32_modules
-                )
-
         missing_keys = list(set(expected_keys) - set(loaded_keys))
         unexpected_keys = list(set(loaded_keys) - set(expected_keys))
 
@@ -1525,27 +1456,12 @@ def _fuse_or_split_keys(
                 ignore_mismatched_sizes,
             )
 
-            if (
-                hasattr(config, "quantization_config")
-                and config.quantization_config.is_weight_quantize()
-            ):
-                error_msgs = _load_state_dict_into_meta_model(
-                    model_to_load,
-                    state_dict,
-                    loaded_keys,
-                    start_prefix,
-                    expected_keys,
-                    dtype=dtype,
-                    is_safetensors=is_safetensors,
-                    keep_in_fp32_modules=keep_in_fp32_modules,
-                )
-            else:
-                error_msgs = _load_state_dict_into_model(
-                    model_to_load,
-                    state_dict,
-                    start_prefix,
-                    convert_from_hf=convert_from_hf,
-                )
+            error_msgs = _load_state_dict_into_model(
+                model_to_load,
+                state_dict,
+                start_prefix,
+                convert_from_hf=convert_from_hf,
+            )
         else:
             # Sharded checkpoint or whole but low_cpu_mem_usage==True
 
@@ -1600,8 +1516,6 @@ def _fuse_or_split_keys(
                         if k[-1] in tp_actions:
                             fuse_actions.pop(k[-1], None)
 
-                if config.quantization_config.is_weight_quantize():
-                    filter_dict_keys = None
                 try:
                     transpose_weight_keys = model.get_transpose_weight_keys()
                 except NotImplementedError:
@@ -1630,14 +1544,6 @@ def _fuse_or_split_keys(
                 missing_keys = list(set(missing_keys) - set(new_keys))
                 unexpected_keys = list(set(unexpected_keys) - set(fused_keys))
 
-                if config.quantization_config.is_weight_quantize():
-                    state_dict = convert_to_quantize_state_dict(
-                        state_dict,
-                        quantization_linear_list,
-                        config.quantization_config,
-                        dtype,
-                    )
-
                 # Mismatched keys contains tuples key/shape1/shape2 of weights in the checkpoint that have a shape not
                 # matching the weights in the model.
                 mismatched_keys += _find_mismatched_keys(
@@ -1664,7 +1570,7 @@ def _fuse_or_split_keys(
                     )
                     logging.info("Converted state_dict to Tensor Parallel Format")
 
-                if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
+                if low_cpu_mem_usage:
                     new_error_msgs = _load_state_dict_into_meta_model(
                         model_to_load,
                         state_dict,

From 803bdd105e21d5ff92479ea841f7bba1165b25bf Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 23 Oct 2025 22:35:29 +0800
Subject: [PATCH 24/49] PaddleOCR-VL supports FP32 (#4658)

---
 .../models/common/vlm/transformers/model_utils.py  | 10 +++++++---
 paddlex/inference/models/doc_vlm/predictor.py      | 11 ++---------
 paddlex/inference/utils/misc.py                    | 14 ++++++++++++++
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/paddlex/inference/models/common/vlm/transformers/model_utils.py b/paddlex/inference/models/common/vlm/transformers/model_utils.py
index 2cf13b30ae..e55877e296 100644
--- a/paddlex/inference/models/common/vlm/transformers/model_utils.py
+++ b/paddlex/inference/models/common/vlm/transformers/model_utils.py
@@ -275,7 +275,7 @@ def _load_state_dict_into_model(
     model_to_load, state_dict, start_prefix, convert_from_hf
 ):
     # torch will cast dtype in load_state_dict, but paddle strictly check dtype
-    _convert_state_dict_dtype_and_shape(state_dict, model_to_load)
+    _convert_state_dict_dtype_and_shape(state_dict, model_to_load, convert_from_hf)
 
     error_msgs = []
 
@@ -305,12 +305,16 @@ def _load_state_dict_into_model(
     return error_msgs
 
 
-def _convert_state_dict_dtype_and_shape(state_dict, model_to_load):
+def _convert_state_dict_dtype_and_shape(state_dict, model_to_load, convert_from_hf):
     # convert the dtype of state dict
     def is_0d_or_1d(tensor):
         return len(tensor.shape) == 0 or list(tensor.shape) == [1]
 
-    for key, value in model_to_load.state_dict().items():
+    if convert_from_hf:
+        model_state_dict = model_to_load.get_hf_state_dict()
+    else:
+        model_state_dict = model_to_load.state_dict()
+    for key, value in model_state_dict.items():
         if key in list(state_dict.keys()):
             if isinstance(state_dict[key], np.ndarray):
                 raise ValueError(
diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 9d252b454d..1eec92b734 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -28,8 +28,8 @@
 from ....utils import logging
 from ....utils.deps import require_genai_client_plugin
 from ....utils.device import TemporaryDeviceChanger
-from ....utils.env import get_device_type
 from ...common.batch_sampler import DocVLMBatchSampler
+from ...utils.misc import is_bfloat16_available
 from ..base import BasePredictor
 from .result import DocVLMResult
 
@@ -53,15 +53,8 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         if self._use_local_model:
-            import paddle
-
             self.device = kwargs.get("device", None)
-            self.dtype = (
-                "bfloat16"
-                if ("npu" in get_device_type() or paddle.amp.is_bfloat16_supported())
-                and (self.device is None or "cpu" not in self.device)
-                else "float32"
-            )
+            self.dtype = "bfloat16" if is_bfloat16_available(self.device) else "float32"
 
             self.infer, self.processor = self._build(**kwargs)
 
diff --git a/paddlex/inference/utils/misc.py b/paddlex/inference/utils/misc.py
index 02832d1644..9226bdd932 100644
--- a/paddlex/inference/utils/misc.py
+++ b/paddlex/inference/utils/misc.py
@@ -12,9 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from ...utils.device import get_default_device, parse_device
+from ...utils.env import get_device_type
+
 
 def is_mkldnn_available():
     # XXX: Not sure if this is the best way to check if MKL-DNN is available
     from paddle.inference import Config
 
     return hasattr(Config, "set_mkldnn_cache_capacity")
+
+
+def is_bfloat16_available(device):
+    import paddle.amp
+
+    if device is None:
+        device = get_default_device()
+    device_type, _ = parse_device(device)
+    return (
+        "npu" in get_device_type() or paddle.amp.is_bfloat16_supported()
+    ) and device_type in ("gpu", "npu", "xpu", "mlu", "dcu")

From b2ebed2ae7cb53904c5c4763d5fce3b89a55e4e7 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Thu, 23 Oct 2025 14:49:00 +0000
Subject: [PATCH 25/49] Bump version to 3.3.5

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index a0891f563f..fa7adc7ac7 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.4
+3.3.5

From 406d84dd668dbdb424ce3085d46776d85242a3a8 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 24 Oct 2025 21:33:38 +0800
Subject: [PATCH 26/49] PaddleOCR-VL supports CPU and CUDA 11 (#4666)

---
 .../doc_vlm/modeling/paddleocr_vl/_config.py     | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
index 6c563a0cda..28971e626c 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py
@@ -26,6 +26,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from ......utils.device import parse_device
+from ......utils.env import get_paddle_cuda_version
 from ....common.vlm.transformers import PretrainedConfig
 
 
@@ -120,6 +122,8 @@ def __init__(
         vision_config=None,
         **kwargs,
     ):
+        import paddle
+
         # Set default for tied embeddings if not specified.
         super().__init__(
             pad_token_id=pad_token_id,
@@ -165,13 +169,13 @@ def __init__(
         super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
 
         # Currently, these configuration items are hard-coded
-        from ......utils.env import get_paddle_cuda_version
 
-        cuda_version = get_paddle_cuda_version()
-        if cuda_version and cuda_version[0] > 11:
-            self.fuse_rms_norm = True
-        else:
-            self.fuse_rms_norm = False
+        self.fuse_rms_norm = False
+        device_type, _ = parse_device(paddle.device.get_device())
+        if device_type == "gpu":
+            cuda_version = get_paddle_cuda_version()
+            if cuda_version and cuda_version[0] > 11:
+                self.fuse_rms_norm = True
         self.use_sparse_flash_attn = True
         self.use_var_len_flash_attn = False
         self.scale_qk_coeff = 1.0

From 7905c55f059b4eb7ec80191fde8cffdb7862ac9d Mon Sep 17 00:00:00 2001
From: zhouchangda <zhouchangda@baidu.com>
Date: Fri, 24 Oct 2025 08:06:50 +0000
Subject: [PATCH 27/49] update docs

---
 docs/installation/installation.en.md          |   18 +-
 docs/installation/installation.md             |   19 +-
 .../ocr_pipelines/PaddleOCR-VL.en.md          | 1972 +++++++++++++++++
 .../tutorials/ocr_pipelines/PaddleOCR-VL.md   | 1319 ++++++-----
 4 files changed, 2650 insertions(+), 678 deletions(-)
 create mode 100644 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md

diff --git a/docs/installation/installation.en.md b/docs/installation/installation.en.md
index 9a2a3510a3..795d901dde 100644
--- a/docs/installation/installation.en.md
+++ b/docs/installation/installation.en.md
@@ -148,13 +148,16 @@ If your Docker version >= 19.03, please use:
 
 ```bash
 # For CPU
-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-cpu /bin/bash
+docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-cpu /bin/bash
 
 # gpu，requires GPU driver version ≥450.80.02 (Linux) or ≥452.39 (Windows)
-docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
+docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
 
 # gpu，requires GPU driver version ≥545.23.06（Linux） or ≥545.84（Windows）
-docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda12.6-cudnn9.5-trt10.5 /bin/bash
+docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.6-cudnn9.5 /bin/bash
+
+# gpu，requires GPU driver version ≥550.xx
+docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.9-cudnn9.9 /bin/bash
 ```
 
 * If your Docker version <= 19.03 and >= 17.06, please use:
@@ -162,14 +165,17 @@ docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=hos
 <details><summary> Click Here</summary>
 
 <pre><code class="language-bash"># For CPU
-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-cpu /bin/bash
+docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-cpu /bin/bash
 
 # For GPU
 # gpu，requires GPU driver version ≥450.80.02 (Linux) or ≥452.39 (Windows)
-nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
+nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
 
 # gpu，requires GPU driver version ≥545.23.06（Linux） or ≥545.84（Windows）
-nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda12.6-cudnn9.5-trt10.5 /bin/bash
+nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.6-cudnn9.5 /bin/bash
+
+# gpu，requires GPU driver version ≥550.xx
+nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.9-cudnn9.9 /bin/bash
 
 </code></pre></details>
 
diff --git a/docs/installation/installation.md b/docs/installation/installation.md
index 46fbaab074..46ab4195e9 100644
--- a/docs/installation/installation.md
+++ b/docs/installation/installation.md
@@ -155,14 +155,17 @@ paddlex --install PaddleXXX  # 例如PaddleOCR
 
 ```bash
 # 对于 CPU 用户
-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-cpu /bin/bash
+docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-cpu /bin/bash
 
 # 对于 GPU 用户
 # GPU 版本，需显卡驱动程序版本 ≥450.80.02（Linux）或 ≥452.39（Windows）
-docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
+docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
 
 # GPU 版本，需显卡驱动程序版本 ≥545.23.06（Linux）或 ≥545.84（Windows）
-docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda12.6-cudnn9.5-trt10.5 /bin/bash
+docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.6-cudnn9.5 /bin/bash
+
+# GPU 版本，需显卡驱动程序版本 ≥550.xx
+docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.9-cudnn9.9 /bin/bash
 ```
 
 
@@ -171,14 +174,16 @@ docker run --gpus all --name paddlex -v $PWD:/paddle --shm-size=8g --network=hos
 <details><summary> 点击展开</summary>
 
 <pre><code class="language-bash"># 对于 CPU 用户
-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-cpu /bin/bash
+docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-cpu /bin/bash
 
-# 对于 GPU 用户
 # GPU 版本，需显卡驱动程序版本 ≥450.80.02（Linux）或 ≥452.39（Windows）
-nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
+nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda11.8-cudnn8.9-trt8.6 /bin/bash
 
 # GPU 版本，需显卡驱动程序版本 ≥545.23.06（Linux）或 ≥545.84（Windows）
-nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda12.6-cudnn9.5-trt10.5 /bin/bash
+nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.6-cudnn9.5 /bin/bash
+
+# GPU 版本，需显卡驱动程序版本 ≥550.xx
+nvidia-docker run --name paddlex -v $PWD:/paddle --shm-size=8g --network=host -it ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.3.4-paddlepaddle3.2.0-gpu-cuda12.9-cudnn9.9 /bin/bash
 
 </code></pre></details>
 
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
new file mode 100644
index 0000000000..0df19765f1
--- /dev/null
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
@@ -0,0 +1,1972 @@
+---
+comments: true
+---
+
+# PaddleOCR-VL Introduction
+
+PaddleOCR-VL is a SOTA and resource-efficient model tailored for document parsing. Its core component is PaddleOCR-VL-0.9B, a compact yet powerful vision-language model (VLM) that integrates a NaViT-style dynamic resolution visual encoder with the ERNIE-4.5-0.3B language model to enable accurate element recognition. This innovative model efficiently supports 109 languages and excels in recognizing complex elements (e.g., text, tables, formulas, and charts), while maintaining minimal resource consumption. Through comprehensive evaluations on widely used public benchmarks and in-house benchmarks, PaddleOCR-VL achieves SOTA performance in both page-level document parsing and element-level recognition. It significantly outperforms existing solutions, exhibits strong competitiveness against top-tier VLMs, and delivers fast inference speeds. These strengths make it highly suitable for practical deployment in real-world scenarios.
+
+<img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/refs/heads/main/images/paddleocr_vl/metrics/allmetric.png"/>
+
+## 1. Environment Preparation
+
+Install PaddlePaddle and PaddleX:
+
+```shell
+python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+python -m pip install paddlex
+python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+```
+> For Windows users, please use WSL or a Docker container.
+
+Running the PaddleOCR-VL has the following GPU hardware requirements:
+
+<table border="1">
+<thead>
+  <tr>
+    <th>Inference Method</th>
+    <th>GPU Compute Capability</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>PaddlePaddle</td>
+    <td>≥ 8.5</td>
+  </tr>
+  <tr>
+    <td>vLLM</td>
+    <td>≥ 8 (RTX 3060, RTX 5070, A10, A100, ...) <br />
+    7 ≤ GPU Compute Capability < 8 (T4, V100, ...) Supported but may experience issues like request timeouts, OOM errors, etc. Not recommended for use.
+    </td>
+  </tr>
+  <tr>
+    <td>SGLang</td>
+    <td>8 ≤ GPU Compute Capability < 12</td>
+  </tr>
+</tbody>
+</table>
+
+The PaddleOCR-VL currently does not support CPU or Arm architecture. Support for more hardware will be expanded based on actual requirements in the future. Stay tuned!
+
+## 2. Quick Start
+
+PaddleOCR-VL supports two usage methods: CLI command line and Python API. The CLI command line method is simpler and suitable for quickly verifying functionality, while the Python API method is more flexible and suitable for integration into existing projects.
+
+### 2.1 Command Line Usage
+
+Run a single command to quickly test the PaddleOCR-VL ：
+
+```bash
+paddlex --pipeline PaddleOCR-VL --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/pp_ocr_vl_demo.png
+
+# Use --use_doc_orientation_classify to enable document orientation classification
+paddlex --pipeline PaddleOCR-VL --input ./paddleocr_vl_demo.png --use_doc_orientation_classify True
+
+# Use --use_doc_unwarping to enable document unwarping module
+paddlex --pipeline PaddleOCR-VL --input ./paddleocr_vl_demo.png --use_doc_unwarping True
+
+# Use --use_layout_detection to enable layout detection
+paddlex --pipeline PaddleOCR-VL --input ./paddleocr_vl_demo.png --use_layout_detection False
+```
+
+<details><summary><b>Command line supports more parameters. Click to expand for detailed parameter descriptions</b></summary>
+<table>
+<thead>
+<tr>
+<th>Parameter</th>
+<th>Description</th>
+<th>Type</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>input</code></td>
+<td>Data to be predicted, required.
+For example, the local path of an image file or PDF file: <code>/root/data/img.jpg</code>;<b>Such as a URL link</b>, for example, the network URL of an image file or PDF file:<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png">Example</a>;<b>Such as a local directory</b>, which should contain the images to be predicted, for example, the local path: <code>/root/data/</code>(Currently, prediction for directories containing PDF files is not supported. PDF files need to be specified with a specific file path).</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>save_path</code></td>
+<td>Specify the path where the inference result file will be saved. If not set, the inference results will not be saved locally.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>layout_detection_model_name</code></td>
+<td>Name of the layout area detection and ranking model. If not set, the default model of the production line will be used.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>layout_detection_model_dir</code></td>
+<td>Directory path of the layout area detection and ranking model. If not set, the official model will be downloaded.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>layout_threshold</code></td>
+<td>Score threshold for the layout model. Any value between  <code>0-1</code>. If not set, the default value is used, which is  <code>0.5</code>.
+</td>
+</tr>
+<tr>
+<td><code>layout_nms</code></td>
+<td>Whether to use post-processing NMS for layout detection. If not set, the initialized default value will be used.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>layout_unclip_ratio</code></td>
+<td>Expansion coefficient for the detection boxes of the layout area detection model.Any floating-point number greater than <code>0</code>. If not set, the initialized default value will be used.</td>
+<td><code>float</code></td>
+</tr>
+<tr>
+<td><code>layout_merge_bboxes_mode</code></td>
+<td>Merging mode for the detection boxes output by the model in layout detection.
+<ul>
+<li><b>large</b> when set to large, it means that among the detection boxes output by the model, for overlapping and contained boxes, only the outermost largest box is retained, and the overlapping inner boxes are deleted;</li>
+<li><b>small</b>, when set to small, it means that among the detection boxes output by the model, for overlapping and contained boxes, only the innermost contained small box is retained, and the overlapping outer boxes are deleted;</li>
+<li><b>union</b>,no filtering is performed on the boxes, and both inner and outer boxes are retained;</li></ul>
+If not set, the initialized parameter value will be used.
+</td>
+<td><code>str</code></td>
+<tr>
+<td><code>vl_rec_model_name</code></td>
+<td>Name of the multimodal recognition model. If not set, the default model will be used.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_dir</code></td>
+<td>Directory path of the multimodal recognition model. If not set, the official model will be downloaded.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_backend</code></td>
+<td>Inference backend used by the multimodal recognition model.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_server_url</code></td>
+<td>If the multimodal recognition model uses an inference service, this parameter is used to specify the server URL.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_max_concurrency</code></td>
+<td>If the multimodal recognition model uses an inference service, this parameter is used to specify the maximum number of concurrent requests.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_name</code></td>
+<td>Name of the document orientation classification model. If not set, the initialized default value will be used.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_dir</code></td>
+<td>Directory path of the document orientation classification model. If not set, the official model will be downloaded.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_name</code></td>
+<td>Name of the text image rectification model. If not set, the initialized default value will be used.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_dir</code></td>
+<td>Directory path of the text image rectification model. If not set, the official model will be downloaded.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>use_doc_orientation_classify</code></td>
+<td>Whether to load and use the document orientation classification module. If not set, the initialized default value will be used, which is initialized to<code>False</code>.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>use_doc_unwarping</code></td>
+<td>Whether to load and use the text image rectification module. If not set, the initialized default value will be used, which is initialized to <code>False.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>use_layout_detection</code></td>
+<td>Whether to load and use the layout area detection and ranking module. If not set, the initialized default value will be used, which is initialized to <code>True</code>.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>use_chart_recognition</code></td>
+<td>Whether to use the chart parsing function. If not set, the initialized default value will be used, which is initialized to <code>False</code>.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>format_block_content</code></td>
+<td>Controls whether to format the <code>block_content</code> content within as Markdown. If not set, the initialized default value will be used, which defaults to initialization as<code>False</code>.</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_queues</code></td>
+<td>Used to control whether to enable internal queues. When set to <code>True</code>, data loading (such as rendering PDF pages as images), layout detection model processing, and VLM inference will be executed asynchronously in separate threads, with data passed through queues, thereby improving efficiency. This approach is particularly efficient for PDF documents with a large number of pages or directories containing a large number of images or PDF files.</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>prompt_label</code></td>
+<td>The prompt type setting for the VL model, which takes effect if and only if <code>use_layout_detection=False</code>.</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>repetition_penalty</code></td>
+<td>The repetition penalty parameter used in VL model sampling.</td>
+<td><code>float</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>temperature</code></td>
+<td>The temperature parameter used in VL model sampling.</td>
+<td><code>float</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>top_p</code></td>
+<td>The top-p parameter used in VL model sampling.</td>
+<td><code>float</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>min_pixels</code></td>
+<td>The minimum number of pixels allowed when the VL model preprocesses images.</td>
+<td><code>int</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>max_pixels</code></td>
+<td>The maximum number of pixels allowed when the VL model preprocesses images.</td>
+<td><code>int</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>device</code></td>
+<td>The device used for inference. Supports specifying specific card numbers:<ul>
+<li><b>CPU</b>: For example,<code>cpu</code> indicates using the CPU for inference;</li>
+<li><b>GPU</b>: For example,<code>gpu:0</code> indicates using the first GPU for inference;</li>
+<li><b>NPU</b>: For example,<code>npu:0</code> indicates using the first NPU for inference;</li>
+<li><b>XPU</b>: For example,<code>xpu:0</code> indicates using the first XPU for inference;</li>
+<li><b>MLU</b>: For example,<code>mlu:0</code> indicates using the first MLU for inference;</li>
+<li><b>DCU</b>: For example,<code>dcu:0</code> indicates using the first DCU for inference;</li>
+</ul>If not set, the initialized default value will be used. During initialization, the local GPU device 0 will be used preferentially. If it is not available, the CPU device will be used.</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>enable_hpi</code></td>
+<td>Whether to enable high-performance inference.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>use_tensorrt</code></td>
+<td>Whether to enable the TensorRT subgraph engine of Paddle Inference. If the model does not support acceleration via TensorRT, acceleration will not be used even if this flag is set.<br/>For PaddlePaddle version with CUDA 11.8, the compatible TensorRT version is 8.x (x&amp;gt;=6). It is recommended to install TensorRT 8.6.1.6.<br/>
+</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>precision</code></td>
+<td>Computational precision, such as fp32, fp16.</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>enable_mkldnn</code></td>
+<td>Whether to enable MKL-DNN accelerated inference. If MKL-DNN is not available or the model does not support acceleration via MKL-DNN, acceleration will not be used even if this flag is set.</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>mkldnn_cache_capacity</code></td>
+<td>MKL-DNN cache capacity.</td>
+<td><code>int</code></td>
+</tr>
+<tr>
+<td><code>cpu_threads</code></td>
+<td>The number of threads used for inference on the CPU.</td>
+<td><code>int</code></td>
+</tr>
+<tr>
+<td><code>paddlex_config</code></td>
+<td>The file path for PaddleX production line configuration.</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+</tbody>
+</table>
+</details>
+<br/>
+
+The inference result will be printed in the terminal. The default output of the PP-StructureV3 pipeline is as follows:
+
+<details><summary> 👉Click to expand</summary>
+<pre>
+ <code>
+{'res': {'input_path': 'paddleocr_vl_demo.png', 'page_index': None, 'model_settings': {'use_doc_preprocessor': False, 'use_layout_detection': True, 'use_chart_recognition': False, 'format_block_content': False}, 'layout_det_res': {'input_path': None, 'page_index': None, 'boxes': [{'cls_id': 6, 'label': 'doc_title', 'score': 0.9636914134025574, 'coordinate': [np.float32(131.31366), np.float32(36.450516), np.float32(1384.522), np.float32(127.984665)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9281806349754333, 'coordinate': [np.float32(585.39465), np.float32(158.438), np.float32(930.2184), np.float32(182.57469)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9840355515480042, 'coordinate': [np.float32(9.023666), np.float32(200.86115), np.float32(361.41583), np.float32(343.8828)]}, {'cls_id': 14, 'label': 'image', 'score': 0.9871416091918945, 'coordinate': [np.float32(775.50574), np.float32(200.66502), np.float32(1503.3807), np.float32(684.9304)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9801855087280273, 'coordinate': [np.float32(9.532196), np.float32(344.90594), np.float32(361.4413), np.float32(440.8244)]}, {'cls_id': 17, 'label': 'paragraph_title', 'score': 0.9708921313285828, 'coordinate': [np.float32(28.040405), np.float32(455.87976), np.float32(341.7215), np.float32(520.7117)]}, {'cls_id': 24, 'label': 'vision_footnote', 'score': 0.9002962708473206, 'coordinate': [np.float32(809.0692), np.float32(703.70044), np.float32(1488.3016), np.float32(750.5238)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9825374484062195, 'coordinate': [np.float32(8.896561), np.float32(536.54895), np.float32(361.05237), np.float32(655.8058)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9822263717651367, 'coordinate': [np.float32(8.971573), np.float32(657.4949), np.float32(362.01715), np.float32(774.625)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9767460823059082, 'coordinate': [np.float32(9.407074), np.float32(776.5216), np.float32(361.31067), np.float32(846.82874)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9868153929710388, 'coordinate': [np.float32(8.669495), np.float32(848.2543), np.float32(361.64703), np.float32(1062.8568)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9826608300209045, 'coordinate': [np.float32(8.8025055), np.float32(1063.8615), np.float32(361.46588), np.float32(1182.8524)]}, {'cls_id': 22, 'label': 'text', 'score': 0.982555627822876, 'coordinate': [np.float32(8.820602), np.float32(1184.4663), np.float32(361.66394), np.float32(1302.4507)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9584776759147644, 'coordinate': [np.float32(9.170288), np.float32(1304.2161), np.float32(361.48898), np.float32(1351.7483)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9782056212425232, 'coordinate': [np.float32(389.1618), np.float32(200.38202), np.float32(742.7591), np.float32(295.65146)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9844875931739807, 'coordinate': [np.float32(388.73303), np.float32(297.18463), np.float32(744.00024), np.float32(441.3034)]}, {'cls_id': 17, 'label': 'paragraph_title', 'score': 0.9680547714233398, 'coordinate': [np.float32(409.39468), np.float32(455.89386), np.float32(721.7174), np.float32(520.9387)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9741666913032532, 'coordinate': [np.float32(389.71606), np.float32(536.8138), np.float32(742.7112), np.float32(608.00165)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9840384721755981, 'coordinate': [np.float32(389.30988), np.float32(609.39636), np.float32(743.09247), np.float32(750.3231)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9845995306968689, 'coordinate': [np.float32(389.13272), np.float32(751.7772), np.float32(743.058), np.float32(894.8815)]}, {'cls_id': 22, 'label': 'text', 'score': 0.984852135181427, 'coordinate': [np.float32(388.83267), np.float32(896.0371), np.float32(743.58215), np.float32(1038.7345)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9804865717887878, 'coordinate': [np.float32(389.08478), np.float32(1039.9119), np.float32(742.7585), np.float32(1134.4897)]}, {'cls_id': 22, 'label': 'text', 'score': 0.986461341381073, 'coordinate': [np.float32(388.52643), np.float32(1135.8137), np.float32(743.451), np.float32(1352.0085)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9869391918182373, 'coordinate': [np.float32(769.8341), np.float32(775.66235), np.float32(1124.9813), np.float32(1063.207)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9822869896888733, 'coordinate': [np.float32(770.30383), np.float32(1063.938), np.float32(1124.8295), np.float32(1184.2192)]}, {'cls_id': 17, 'label': 'paragraph_title', 'score': 0.9689218997955322, 'coordinate': [np.float32(791.3042), np.float32(1199.3169), np.float32(1104.4521), np.float32(1264.6985)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9713128209114075, 'coordinate': [np.float32(770.4253), np.float32(1279.6072), np.float32(1124.6917), np.float32(1351.8672)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9236552119255066, 'coordinate': [np.float32(1153.9058), np.float32(775.5814), np.float32(1334.0654), np.float32(798.1581)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9857938885688782, 'coordinate': [np.float32(1151.5197), np.float32(799.28015), np.float32(1506.3619), np.float32(991.1156)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9820687174797058, 'coordinate': [np.float32(1151.5686), np.float32(991.91095), np.float32(1506.6023), np.float32(1110.8875)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9866049885749817, 'coordinate': [np.float32(1151.6919), np.float32(1112.1301), np.float32(1507.1611), np.float32(1351.9504)]}]}}}
+</code></pre></details>
+
+For explanation of the result parameters, refer to [2.2 Python Script Integration](#222-python-script-integration).
+
+<b>Note: </b> The default model for the production line is relatively large, which may result in slower inference speed. It is recommended to use [inference acceleration frameworks to enhance VLM inference performance](#31-starting-the-vlm-inference-service) for faster inference.
+
+### 2.2 Python Script Integration
+
+The command line method is for quick testing and visualization. In actual projects, you usually need to integrate the model via code. You can perform pipeline inference with just a few lines of code as shown below:
+
+```python
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="PaddleOCR-VL")
+
+output = pipeline.predict(input="./pp_ocr_vl_demo.png")
+
+for res in output:
+    res.print() ## 打印预测的结构化输出
+    res.save_to_json(save_path="output") ## 保存当前图像的结构化json结果
+    res.save_to_markdown(save_path="output") ## 保存当前图像的markdown格式的结果
+```
+
+For PDF files, each page will be processed individually and generate a separate Markdown file. If you want to convert the entire PDF to a single Markdown file, use the following method:
+
+```python
+from pathlib import Path
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="PaddleOCR-VL")
+
+input_file = "./your_pdf_file.pdf"
+output_path = Path("./output")
+
+output = pipeline.predict(
+    input=input_file,
+    use_doc_orientation_classify=False,
+    use_doc_unwarping=False)
+
+markdown_list = []
+markdown_images = []
+
+for res in output:
+    md_info = res.markdown
+    markdown_list.append(md_info)
+    markdown_images.append(md_info.get("markdown_images", {}))
+
+markdown_texts = pipeline.concatenate_markdown_pages(markdown_list)
+
+mkd_file_path = output_path / f"{Path(input_file).stem}.md"
+mkd_file_path.parent.mkdir(parents=True, exist_ok=True)
+
+with open(mkd_file_path, "w", encoding="utf-8") as f:
+    f.write(markdown_texts)
+
+for item in markdown_images:
+    if item:
+        for path, image in item.items():
+            file_path = output_path / path
+            file_path.parent.mkdir(parents=True, exist_ok=True)
+            image.save(file_path)
+```
+
+**Note:**
+
+- In the example code, the parameters `use_doc_orientation_classify` and  `use_doc_unwarping` are all set to `False` by default. These indicate that document orientation classification and document image unwarping are disabled. You can manually set them to `True` if needed.
+
+The above Python script performs the following steps:
+
+<details><summary>(1) Instantiate the production line object. Specific parameter descriptions are as follows:</summary>
+<table>
+<thead>
+<tr>
+<th>Parameter</th>
+<th>Parameter Description</th>
+<th>Parameter Type</th>
+<th>Default Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>layout_detection_model_name</code></td>
+<td>Name of the layout area detection and ranking model. If set to <code>None</code>, the default model of the production line will be used.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_detection_model_dir</code></td>
+<td>Directory path of the layout area detection and ranking model. If set to <code>None</code>, the official model will be downloaded.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_threshold</code></td>
+<td>Score threshold for the layout model.
+<ul>
+<li><b>float</b>: Any floating-point number between <code>0-1</code>;</li>
+<li><b>dict</b>: <code>{0:0.1}</code> The key is the class ID, and the value is the threshold for that class;</li>
+<li><b>None</b>: If set to <code>None</code>, the parameter value initialized by the production line will be used.</li>
+</ul>
+<td><code>float|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_nms</code></td>
+<td>Whether to use post-processing NMS for layout detection. If set to <code>None</code>, the parameter value initialized by the production line will be used.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_unclip_ratio</code></td>
+<td>
+Expansion coefficient for the detection box of the layout area detection model.
+<ul>
+<li><b>float</b>: Any floating-point number greater than <code>0</code></li>
+<li><b>Tuple[float,float]</b>: The respective expansion coefficients in the horizontal and vertical directions;</li>
+<li><b>dict</b>: where the key of the dict is of <b>int</b> type, representing <code>cls_id</code>, and the value is of</code>tuple <code>type, such as</code>{0: (1.1, 2.0)}, indicating that the center of the detection box for class 0 output by the model remains unchanged, with the width expanded by 1.1 times and the height expanded by 2.0 times;</li>
+<li><b>None</b>: If set to <code>None</code>, the parameter value initialized by the production line will be used.</li>
+</ul>
+<td><code>float|Tuple[float,float]|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_merge_bboxes_mode</code><ul>
+<td>Merging mode for the detection boxes output by the model in layout detection.
+<ul>
+<li><b>large</b> when set to large, it means that among the detection boxes output by the model, for overlapping and contained boxes, only the outermost largest box is retained, and the overlapping inner boxes are deleted;</li>
+<li><b>small</b>, when set to small, it means that among the detection boxes output by the model, for overlapping and contained boxes, only the innermost contained small box is retained, and the overlapping outer boxes are deleted;</li>
+<li><b>union</b>,no filtering is performed on the boxes, and both inner and outer boxes are retained;</li></ul>
+If not set, the initialized parameter value will be used.
+</td>
+<td><code>str|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_name</code></td>
+<td>Name of the multimodal recognition model. If not set, the default model will be used.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_dir</code></td>
+<td>Directory path of the multimodal recognition model. If not set, the official model will be downloaded.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_backend</code></td>
+<td>Inference backend used by the multimodal recognition model.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_server_url</code></td>
+<td>If the multimodal recognition model uses an inference service, this parameter is used to specify the server URL.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_max_concurrency</code></td>
+<td>If the multimodal recognition model uses an inference service, this parameter is used to specify the maximum number of concurrent requests.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_name</code></td>
+<td>Name of the document orientation classification model. If not set, the initialized default value will be used.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_dir</code></td>
+<td>Directory path of the document orientation classification model. If not set, the official model will be downloaded.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_name</code></td>
+<td>Name of the text image rectification model. If not set, the initialized default value will be used.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_dir</code></td>
+<td>Directory path of the text image rectification model. If not set, the official model will be downloaded.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_doc_orientation_classify</code></td>
+<td>Whether to load and use the document orientation classification module. If not set, the initialized default value will be used, which is initialized to<code>False</code>.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_doc_unwarping</code></td>
+<td>Whether to load and use the text image rectification module. If not set, the initialized default value will be used, which is initialized to <code>False.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_layout_detection</code></td>
+<td>Whether to load and use the layout area detection and ranking module. If not set, the initialized default value will be used, which is initialized to <code>True</code>.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_chart_recognition</code></td>
+<td>Whether to use the chart parsing function. If not set, the initialized default value will be used, which is initialized to <code>False</code>.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>format_block_content</code></td>
+<td>Controls whether to format the <code>block_content</code> content within as Markdown. If not set, the initialized default value will be used, which defaults to initialization as<code>False</code>.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>device</code></td>
+<td>The device used for inference. Supports specifying specific card numbers:<ul>
+<li><b>CPU</b>: For example,<code>cpu</code> indicates using the CPU for inference;</li>
+<li><b>GPU</b>: For example,<code>gpu:0</code> indicates using the first GPU for inference;</li>
+<li><b>NPU</b>: For example,<code>npu:0</code> indicates using the first NPU for inference;</li>
+<li><b>XPU</b>: For example,<code>xpu:0</code> indicates using the first XPU for inference;</li>
+<li><b>MLU</b>: For example,<code>mlu:0</code> indicates using the first MLU for inference;</li>
+<li><b>DCU</b>: For example,<code>dcu:0</code> indicates using the first DCU for inference;</li>
+</ul>If not set, the initialized default value will be used. During initialization, the local GPU device 0 will be used preferentially. If it is not available, the CPU device will be used.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>enable_hpi</code></td>
+<td>Whether to enable high-performance inference.</td>
+<td><code>bool</code></td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<td><code>use_tensorrt</code></td>
+<td>Whether to enable the TensorRT subgraph engine of Paddle Inference. If the model does not support acceleration via TensorRT, acceleration will not be used even if this flag is set.<br/>For PaddlePaddle version with CUDA 11.8, the compatible TensorRT version is 8.x (x&amp;gt;=6). It is recommended to install TensorRT 8.6.1.6.<br/>
+</td>
+<td><code>bool</code></td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<td><code>precision</code></td>
+<td>Computational precision, such as fp32, fp16.</td>
+<td><code>str</code></td>
+<td><code>"fp32"</code></td>
+</tr>
+<tr>
+<td><code>enable_mkldnn</code></td>
+<td>Whether to enable MKL-DNN accelerated inference. If MKL-DNN is not available or the model does not support acceleration via MKL-DNN, acceleration will not be used even if this flag is set.</td>
+<td><code>bool</code></td>
+<td><code>True</code></td>
+</tr>
+<tr>
+<td><code>mkldnn_cache_capacity</code></td>
+<td>MKL-DNN cache capacity.</td>
+<td><code>int</code></td>
+<td><code>10</code></td>
+</tr>
+<tr>
+<td><code>cpu_threads</code></td>
+<td>The number of threads used for inference on the CPU.</td>
+<td><code>int</code></td>
+<td><code>8</code></td>
+</tr>
+<tr>
+<td><code>paddlex_config</code></td>
+<td>The file path for PaddleX production line configuration.</td>
+<td><code>str</code></td>
+<td><code>None</code></td>
+<td></td>
+</tr>
+</tbody>
+</table>
+</details>
+
+<details><summary>(2) Call the <code>predict()</code>method of the PaddleOCR-VL production line object for inference prediction. This method will return a list of results. Additionally, the production line also provides the <code>predict_iter()</code>Method. The two are completely consistent in terms of parameter acceptance and result return. The difference lies in that <code>predict_iter()</code>returns a <code>generator</code>, which can process and obtain prediction results step by step. It is suitable for scenarios involving large datasets or where memory conservation is desired. You can choose either of these two methods based on actual needs. Below are the parameters of the <code>predict()</code>method and their descriptions:</summary>
+<table>
+<thead>
+<tr>
+<th>Parameter</th>
+<th>Parameter Description</th>
+<th>Parameter Type</th>
+<th>Default Value</th>
+</tr>
+</thead>
+<tr>
+<tr>
+<td><code>input</code></td>
+<td>Data to be predicted, supporting multiple input types. Required.<ul>
+<li><b>Python Var</b>: such as <code>numpy.ndarray</code> representing image data</li>
+<li><b>str</b>: such as the local path of an image file or PDF file: <code>/root/data/img.jpg</code>;<b>such as a URL link</b>, such as the network URL of an image file or PDF file:<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png">Example</a>;<b>such as a local directory</b>, which should contain the images to be predicted, such as the local path: <code>/root/data/</code>(Currently, prediction for directories containing PDF files is not supported. PDF files need to be specified with a specific file path)</li>
+<li><b>list</b>: List elements should be of the aforementioned data types, such as <code>[numpy.ndarray, numpy.ndarray]</code>, <code>["/root/data/img1.jpg", "/root/data/img2.jpg"]</code>, <code>["/root/data1", "/root/data2"].</code></li>
+</ul>
+</td>
+<td><code>Python Var|str|list</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_doc_orientation_classify</code></td>
+<td>Whether to use the document orientation classification module during inference. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_doc_unwarping</code></td>
+<td>Whether to use the text image rectification module during inference. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_layout_detection</code></td>
+<td>Whether to use the layout region detection and sorting module during inference. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_chart_recognition</code></td>
+<td>Whether to use the chart parsing module during inference. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_threshold</code></td>
+<td>The parameter meaning is basically the same as the instantiation parameter. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>float|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_nms</code></td>
+<td>The parameter meaning is basically the same as the instantiation parameter. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_unclip_ratio</code></td>
+<td>The parameter meaning is basically the same as the instantiation parameter. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>float|Tuple[float,float]|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_merge_bboxes_mode</code></td>
+<td>The parameter meaning is basically the same as the instantiation parameter. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>str|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_queues</code></td>
+<td>Used to control whether to enable internal queues. When set to <code>True</code>, data loading (such as rendering PDF pages as images), layout detection model processing, and VLM inference will be executed asynchronously in separate threads, with data passed through queues, thereby improving efficiency. This approach is particularly efficient for PDF documents with many pages or directories containing a large number of images or PDF files.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>prompt_label</code></td>
+<td>The prompt type setting for the VL model, which takes effect only when <code>use_layout_detection=False</code>. The fillable parameters are <code>ocr</code>、<code>formula</code>、<code>table</code> and <code>chart</code>.</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>format_block_content</code></td>
+<td>The parameter meaning is basically the same as the instantiation parameter. Setting it to <code>None</code> means using the instantiation parameter; otherwise, this parameter takes precedence.</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>repetition_penalty</code></td>
+<td>The repetition penalty parameter used for VL model sampling.</td>
+<td><code>float|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>temperature</code></td>
+<td>Temperature parameter used for VL model sampling.</td>
+<td><code>float|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>top_p</code></td>
+<td>Top-p parameter used for VL model sampling.</td>
+<td><code>float|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>min_pixels</code></td>
+<td>The minimum number of pixels allowed when the VL model preprocesses images.</td>
+<td><code>int|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>max_pixels</code></td>
+<td>The maximum number of pixels allowed when the VL model preprocesses images.</td>
+<td><code>int|None</code></td>
+<td><code>None</code></td>
+</tr>
+</table>
+</details>
+<details><summary>(3) Process the prediction results: The prediction result for each sample is a corresponding Result object, supporting operations such as printing, saving as an image, and saving as a <code>json</code> file:</summary>
+<table>
+<thead>
+<tr>
+<th>Method</th>
+<th>Method Description</th>
+<th>Parameter</th>
+<th>Parameter Type</th>
+<th>Parameter Description</th>
+<th>Default Value</th>
+</tr>
+</thead>
+<tr>
+<td rowspan="3"> <code>print()</code></td>
+<td rowspan="3">Print results to the terminal</td>
+<td><code>format_json</code></td>
+<td><code>bool</code></td>
+<td>Whether to format the output content using <code>JSON</code> indentation.</td>
+<td><code>True</code></td>
+</tr>
+<tr>
+<td><code>indent</code></td>
+<td><code>int</code></td>
+<td>Specify the indentation level to beautify the output <code>JSON</code>  data, making it more readable. Only valid when <code>format_json</code> is <code>True</code>.</td>
+<td><code>4</code></td>
+</tr>
+<tr>
+<td><code>ensure_ascii</code></td>
+<td><code>bool</code></td>
+<td>Control whether non- <code>ASCII</code> characters are escaped as <code>Unicode</code>. When set to <code>True</code>, all non- <code>ASCII</code> characters will be escaped; <code>False</code> retains the original characters. Only valid when <code>format_json</code> is <code>True</code>.</td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<td rowspan="3"> <code>save_to_json()</code></td>
+<td rowspan="3">Save the results as a json format file</td>
+<td><code>save_path</code></td>
+<td><code>str</code></td>
+<td>The file path for saving. When it is a directory, the saved file name will be consistent with the input file type naming.</td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>indent</code></td>
+<td><code>int</code></td>
+<td>Specify the indentation level to beautify the output <code>JSON</code>data, making it more readable. Only valid when <code>format_json</code>is <code>True</code>.</td>
+<td><code>4</code></td>
+</tr>
+<tr>
+<td><code>ensure_ascii</code></td>
+<td><code>bool</code></td>
+<td>Control whether non- <code>ASCII</code> characters are escaped as <code>Unicode</code>. When set to <code>True</code>, all non- <code>ASCII</code> characters will be escaped; <code>False</code> retains the original characters. Only valid when <code>format_json</code> is <code>True</code>.</td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<td><code>save_to_img()</code></td>
+<td>Save the visualized images of each intermediate module in png format</td>
+<td><code>save_path</code></td>
+<td><code>str</code></td>
+<td>The file path for saving, supporting directory or file paths.</td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td rowspan="3"> <code>save_to_markdown()</code></td>
+<td rowspan="3">Save each page in an image or PDF file as a markdown format file separately</td>
+<td><code>save_path</code></td>
+<td><code>str</code></td>
+<td>The file path for saving. When it is a directory, the saved file name will be consistent with the input file type naming</td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>pretty</code></td>
+<td><code>bool</code></td>
+<td>Whether to beautify the <code>markdown</code> output results, centering charts, etc., to make the <code>markdown</code> rendering more aesthetically pleasing.</td>
+<td><code>True</code></td>
+</tr>
+<tr>
+<td><code>show_formula_number</code></td>
+<td><code>bool</code></td>
+<td>Control whether to retain formula numbers in <code>markdown</code>. When set to <code>True</code>, all formula numbers are retained; <code>False</code> retains only the formulas</td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<tr>
+<td><code>save_to_html()</code></td>
+<td>Save the tables in the file as html format files</td>
+<td><code>save_path</code></td>
+<td><code>str</code></td>
+<td>The file path for saving, supporting directory or file paths.</td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>save_to_xlsx()</code></td>
+<td>Save the tables in the file as xlsx format files</td>
+<td><code>save_path</code></td>
+<td><code>str</code></td>
+<td>The file path for saving, supporting directory or file paths.</td>
+<td><code>None</code></td>
+</tr>
+</tr>
+</table>
+
+
+- Calling the `print()` method will print the results to the terminal. The content printed to the terminal is explained as follows:
+    - `input_path`: `(str)` The input path of the image or PDF to be predicted.
+
+    - `page_index`: `(Union[int, None])` If the input is a PDF file, it indicates the current page number of the PDF; otherwise, it is `None`.
+
+    - `model_settings`: `(Dict[str, bool])` Model parameters required for configuring PaddleOCR-VL.
+        - `use_doc_preprocessor`: `(bool)` Controls whether to enable the document preprocessing sub-pipeline.
+        - `use_layout_detection`: `(bool)` Controls whether to enable the layout detection module.
+        - `use_chart_recognition`: `(bool)` Controls whether to enable the chart recognition function.
+        - `format_block_content`: `(bool)` Controls whether to save the formatted markdown content in `JSON`.
+
+    - `doc_preprocessor_res`: `(Dict[str, Union[List[float], str]])` A dictionary of document preprocessing results, which exists only when `use_doc_preprocessor=True`.
+        - `input_path`: `(str)` The image path accepted by the document preprocessing sub-pipeline. When the input is a `numpy.ndarray`, it is saved as `None`; here, it is `None`.
+        - `page_index`: `None`. Since the input here is a `numpy.ndarray`, the value is `None`.
+        - `model_settings`: `(Dict[str, bool])` Model configuration parameters for the document preprocessing sub-pipeline.
+          - `use_doc_orientation_classify`: `(bool)` Controls whether to enable the document image orientation classification sub-module.
+          - `use_doc_unwarping`: `(bool)` Controls whether to enable the text image distortion correction sub-module.
+        - `angle`: `(int)` The prediction result of the document image orientation classification sub-module. When enabled, it returns the actual angle value.
+
+    - `parsing_res_list`: `(List[Dict])` A list of parsing results, where each element is a dictionary. The list order is the reading order after parsing.
+        - `block_bbox`: `(np.ndarray)` The bounding box of the layout area.
+        - `block_label`: `(str)` The label of the layout area, such as `text`, `table`, etc.
+        - `block_content`: `(str)` The content within the layout area.
+        - `block_id`: `(int)` The index of the layout area, used to display the layout sorting results.
+        - `block_order` `(int)` The order of the layout area, used to display the layout reading order. For non-sorted parts, the default value is `None`.
+- Calling the `save_to_json()` method will save the above content to the specified `save_path`. If a directory is specified, the saved path will be `save_path/{your_img_basename}_res.json`. If a file is specified, it will be saved directly to that file. Since json files do not support saving numpy arrays, the `numpy.array` types within will be converted to list form.
+    - `input_path`: `(str)` The input path of the image or PDF to be predicted.
+
+    - `page_index`: `(Union[int, None])` If the input is a PDF file, it indicates the current page number of the PDF; otherwise, it is `None`.
+
+    - `model_settings`: `(Dict[str, bool])` Model parameters required for configuring PaddleOCR-VL.
+
+        - `use_doc_preprocessor`: `(bool)` Controls whether to enable the document preprocessing sub-pipeline.
+        - `use_layout_detection`: `(bool)` Controls whether to enable the layout detection module.
+        - `use_chart_recognition`: `(bool)` Controls whether to enable the chart recognition function.
+        - `format_block_content`: `(bool)` Controls whether to save the formatted markdown content in `JSON`.
+
+    - `doc_preprocessor_res`: `(Dict[str, Union[List[float], str]])` A dictionary of document preprocessing results, which exists only when `use_doc_preprocessor=True`.
+        - `input_path`: `(str)` The image path accepted by the document preprocessing sub-pipeline. When the input is a `numpy.ndarray`, it is saved as `None`; here, it is `None`.
+        - `page_index`: `None`. Since the input here is a `numpy.ndarray`, the value is `None`.
+        - `model_settings`: `(Dict[str, bool])` Model configuration parameters for the document preprocessing sub-pipeline.
+          - `use_doc_orientation_classify`: `(bool)` Controls whether to enable the document image orientation classification sub-module.
+          - `use_doc_unwarping`: `(bool)` Controls whether to enable the text image distortion correction sub-module.
+        - `angle`: `(int)` The prediction result of the document image orientation classification sub-module. When enabled, it returns the actual angle value.
+
+    - `parsing_res_list`: `(List[Dict])` A list of parsing results, where each element is a dictionary. The list order represents the reading order after parsing.
+        - `block_bbox`: `(np.ndarray)` The bounding box of the layout region.
+        - `block_label`: `(str)` The label of the layout region, such as `text`, `table`, etc.
+        - `block_content`: `(str)` The content within the layout region.
+        - `block_id`: `(int)` The index of the layout region, used to display the layout sorting results.
+        - `block_order` `(int)` The order of the layout region, used to display the layout reading order. For non-sorted parts, the default value is `None`.
+
+
+- Calling the `save_to_img()` method will save the visualization results to the specified `save_path`. If a directory is specified, visualized images for layout region detection, global OCR, layout reading order, etc., will be saved. If a file is specified, it will be saved directly to that file. (Production lines typically contain many result images, so it is not recommended to directly specify a specific file path, as multiple images will be overwritten, retaining only the last one.)
+- Calling the `save_to_markdown()` method will save the converted Markdown file to the specified `save_path`. The saved file path will be `save_path/{your_img_basename}.md`. If the input is a PDF file, it is recommended to directly specify a directory; otherwise, multiple markdown files will be overwritten.
+
+Additionally, it also supports obtaining visualized images and prediction results with results through attributes, as follows:<table>
+<thead>
+<tr>
+<th>Attribute</th>
+<th>Attribute Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>json</code></td>
+<td>Obtain the prediction <code>json</code>result in the format</td>
+</tr>
+<tr>
+<td rowspan="2"> <code>img</code></td>
+<td rowspan="2">obtain in the format of <code>dict</code>visualized image</td>
+</tr>
+<tr>
+</tr>
+<tr>
+<td rowspan="3"> <code>markdown</code></td>
+<td rowspan="3">obtain in the format of <code>dict</code>markdown result</td>
+</tr>
+<tr>
+</tr>
+<tr>
+</tr>
+</tbody>
+</table>- The prediction result obtained through the `json` attribute is data of dict type, with relevant content consistent with that saved by calling the `save_to_json()` method.
+- The prediction result returned by the `img` attribute is data of dict type. The keys are `layout_det_res`, `overall_ocr_res`, `text_paragraphs_ocr_res`, `formula_res_region1`, `table_cell_img`, and `seal_res_region1`, with corresponding values being `Image.Image` objects: used to display visualized images of layout region detection, OCR, OCR text paragraphs, formulas, tables, and seal results, respectively. If optional modules are not used, the dict only contains `layout_det_res`.
+- The prediction result returned by the `markdown` attribute is data of dict type. The keys are `markdown_texts`, `markdown_images`, and `page_continuation_flags`, with corresponding values being markdown text, images displayed in Markdown (`Image.Image` objects), and a bool tuple used to identify whether the first element on the current page is the start of a paragraph and whether the last element is the end of a paragraph, respectively.</details>
+
+## 3. Enhancing VLM Inference Performance Using Inference Acceleration Frameworks
+
+The inference performance under the default configuration is not fully optimized and may not meet actual production requirements. PaddleX supports improving the inference performance of VLM through inference acceleration frameworks such as vLLM and SGLang, thereby accelerating the inference speed in production lines. The usage process mainly consists of two steps:
+
+1. Start the VLM inference service;
+2. Configure the PaddleX Pipeline to invoke the VLM inference service as a client.
+
+### 3.1 Starting the VLM Inference Service
+
+#### 3.1.1 Using Docker Images
+
+PaddleX provides Docker images for quickly starting the vLLM inference service. The service can be started using the following command:
+
+```bash
+docker run \
+    -it \
+    --rm \
+    --gpus all \
+    --network host \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server
+```
+
+The service listens on port **8080** by default.
+
+Parameters can be passed in when starting the container to override the default configuration, for example:
+
+```bash
+docker run \
+    -it \
+    --rm \
+    --gpus all \
+    --network host \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server \
+    paddlex_genai_server --model_name PaddleOCR-VL-0.9B --host 0.0.0.0 --port 8118 --backend vllm
+```
+
+If you are using an NVIDIA 50 series graphics card (Compute Capability >= 12), you need to install a specific version of FlashAttention before launching the service.
+
+```bash
+docker run \
+    -it \
+    --rm \
+    --gpus all \
+    --network host \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server \
+    /bin/bash
+python -m pip install flash-attn==2.8.3
+paddlex_genai_server --model_name PaddleOCR-VL-0.9B --backend vllm --port 8118
+```
+
+#### 3.1.2 Via PaddleX CLI and Launch
+
+Since the inference acceleration framework may have dependency conflicts with the PaddlePaddle framework, it is recommended to install it in a virtual environment. An example is as follows:
+
+```bash
+# Create a virtual environment
+python -m venv .venv
+# Activate the environment
+source .venv/bin/activate
+# Install PaddleX
+python -m pip install "paddlex[ocr]"
+# Install the vLLM server plugin
+paddlex --install genai-vllm-server
+# Install the SGLang server plugin
+# paddlex --install genai-sglang-server
+```
+
+If you are using an NVIDIA 50 series graphics card (Compute Capability >= 12), you need to install a specific version of FlashAttention before launching the service.
+
+```bash
+python -m pip install flash-attn==2.8.3
+```
+
+After the installation is complete, you can start the service using the `paddlex_genai_server` command:
+
+```bash
+paddlex_genai_server --model_name PaddleOCR-VL-0.9B --backend vllm --port 8118
+```
+
+The parameters supported by this command are as follows:
+
+| Parameter          | Description                                                  |
+| ------------------ | ------------------------------------------------------------ |
+| `--model_name`     | Model name                                                    |
+| `--model_dir`      | Model directory                                               |
+| `--host`           | Server hostname                                               |
+| `--port`           | Server port number                                            |
+| `--backend`        | Backend name, i.e., the name of the inference acceleration framework used. Options are `vllm` or `sglang`. |
+| `--backend_config` | A YAML file can be specified, which contains backend configurations. |
+
+### 3.2 How to Use the Client
+
+After starting the VLM inference service, the client can invoke the service through PaddleX. Before use, the client plugin needs to be installed:
+
+```bash
+paddlex --install genai-client
+```
+
+Next, obtain the production line configuration file:
+
+```bash
+paddlex --get_pipeline_config PaddleOCR-VL
+```
+
+The default save path for the configuration file is `PaddleOCR-VL.yaml`. Modify the fields `VLRecognition.genai_config.backend` and `VLRecognition.genai_config.server_url` in the configuration file to the values corresponding to the previously launched service, for example:
+
+```yaml
+VLRecognition:
+  ...
+  genai_config:
+    backend: vllm
+    server_url: http://127.0.0.1:8118
+```
+
+After that, the modified configuration file can be used for production line invocation. For example, invoke it through the CLI:
+
+```bash
+paddlex --pipeline PaddleOCR-VL.yaml --input paddleocr_vl_demo.png
+```
+
+Or call it via the Python API:
+
+```python
+from paddlex import create_pipeline
+
+pipeline = create_pipeline("PaddleOCR-VL.yaml")
+
+for res in pipeline.predict("paddleocr_vl_demo.png"):
+    res.print()
+```
+
+### 3.3 Performance Tuning
+
+The default configuration is tuned on a single NVIDIA A100 and assumes exclusive client service, so it may not be suitable for other environments. If users encounter performance issues during actual use, they can try the following optimization methods.
+
+#### 3.3.1 Server-side Parameter Adjustment
+
+Different inference acceleration frameworks support different parameters. Refer to their respective official documentation to learn about available parameters and when to adjust them:
+
+- [vLLM Official Parameter Tuning Guide](https://docs.vllm.ai/en/latest/configuration/optimization.html)
+- [SGLang Hyperparameter Tuning Documentation](https://docs.sglang.ai/advanced_features/hyperparameter_tuning.html)
+
+The PaddleX VLM inference service supports parameter tuning through configuration files. The following example demonstrates how to adjust the `gpu-memory-utilization` and `max-num-seqs` parameters of the vLLM server:
+
+1. Create a YAML file named `vllm_config.yaml` with the following content:
+
+```yaml
+gpu-memory-utilization: 0.3
+   max-num-seqs: 128
+
+
+2. Specify the configuration file path when starting the service:
+
+```bash
+paddlex_genai_server --model_name PaddleOCR-VL-0.9B --backend vllm --backend_config vllm_config.yaml
+
+
+If you are using a shell that supports process substitution (such as Bash), you can also pass configuration items directly when starting the service without creating a configuration file:
+
+```bash
+paddlex_genai_server --model_name PaddleOCR-VL-0.9B --backend vllm --backend_config <(echo -e 'gpu-memory-utilization: 0.3\nmax-num-seqs: 128')
+```
+
+#### 3.3.2 Client-Side Parameter Adjustment
+
+PaddleX groups sub-images from single or multiple input images and initiates concurrent requests to the server. Therefore, the number of concurrent requests significantly impacts performance. Users can set the maximum number of concurrent requests by modifying the `VLRecognition.genai_config.max_concurrency` field in the configuration file.
+
+When there is a one-to-one correspondence between the client and the VLM inference service, and the server-side resources are sufficient, the number of concurrent requests can be appropriately increased to enhance performance. If the server needs to support multiple clients or has limited computational resources, the number of concurrent requests should be reduced to avoid service abnormalities caused by resource overload.
+
+#### 3.3.3 Recommendations for Performance Tuning on Common Hardware
+
+The following configurations are tailored for scenarios with a one-to-one correspondence between the client and the VLM inference service.
+
+**NVIDIA RTX 3060**
+
+- **Server-Side**
+  - vLLM: `gpu-memory-utilization=0.8`
+
+## 4. Serving
+
+If you need to directly apply PaddleOCR-VL in your Python project, you can refer to the example code in [2.2 Python Script Integration](#22-python-script-integration).
+
+Additionally, PaddleX also provides a service deployment method, detailed as follows:
+
+### 1.1 Install Dependencies
+
+Run the following command to install the PaddleX serving plugin via PaddleX CLI:
+
+```bash
+paddlex --install serving
+```
+
+### 1.2 Run the Server
+
+Run the server via PaddleX CLI:
+
+```bash
+paddlex --serve --pipeline PaddleOCR-VL
+```
+
+You should see information similar to the following:
+
+```text
+INFO:     Started server process [63108]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+```
+
+If you need to adjust the configuration (such as model path, batch size, deployment device, etc.), you can specify `--pipeline` as a custom configuration file.
+
+The command-line options related to serving are as follows:
+
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>--pipeline</code></td>
+<td>PaddleX pipeline registration name or pipeline configuration file path.</td>
+</tr>
+<tr>
+<td><code>--device</code></td>
+<td>Deployment device for the pipeline. By default, a GPU will be used if available; otherwise, a CPU will be used."</td>
+</tr>
+<tr>
+<td><code>--host</code></td>
+<td>Hostname or IP address to which the server is bound. Defaults to <code>0.0.0.0</code>.</td>
+</tr>
+<tr>
+<td><code>--port</code></td>
+<td>Port number on which the server listens. Defaults to <code>8080</code>.</td>
+</tr>
+<tr>
+<td><code>--use_hpip</code></td>
+<td>If specified, uses high-performance inference. Refer to the High-Performance Inference documentation for more information.</td>
+</tr>
+<tr>
+<td><code>--hpi_config</code></td>
+<td>High-performance inference configuration. Refer to the High-Performance Inference documentation for more information.</td>
+</tr>
+</tbody>
+</table>
+
+### 4.3 Client-Side Invocation
+
+Below are the API references for basic service-based deployment and examples of multilingual service invocation:
+
+<details><summary>API Reference</summary>
+<p>Main operations provided by the service:</p>
+<ul>
+<li>The HTTP request method is POST.</li>
+<li>Both the request body and response body are JSON data (JSON objects).</li>
+<li>When the request is processed successfully, the response status code is<code>200</code>, and the properties of the response body are as follows:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>logId</code></td>
+<td><code>string</code></td>
+<td>The UUID of the request.</td>
+</tr>
+<tr>
+<td><code>errorCode</code></td>
+<td><code>integer</code></td>
+<td>Error code. Fixed as <code>0</code>.</td>
+</tr>
+<tr>
+<td><code>errorMsg</code></td>
+<td><code>string</code></td>
+<td>Error description. Fixed as <code>"Success"</code>.</td>
+</tr>
+<tr>
+<td><code>result</code></td>
+<td><code>object</code></td>
+<td>Operation result.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>When the request is not processed successfully, the properties of the response body are as follows:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>logId</code></td>
+<td><code>string</code></td>
+<td>The UUID of the request.</td>
+</tr>
+<tr>
+<td><code>errorCode</code></td>
+<td><code>integer</code></td>
+<td>Error code. Same as the response status code.</td>
+</tr>
+<tr>
+<td><code>errorMsg</code></td>
+<td><code>string</code></td>
+<td>Error description.</td>
+</tr>
+</tbody>
+</table>
+<p>The main operations provided by the service are as follows:</p>
+<ul>
+<li><b><code>infer</code></b></li>
+</ul>
+<p>Perform layout parsing.</p>
+<p><code>POST /layout-parsing</code></p>
+<ul>
+<li>The properties of the request body are as follows:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+<th>Required</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>file</code></td>
+<td><code>string</code></td>
+<td>The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.<br/>To remove the page limit, add the following configuration to the production line configuration file:<pre> <code>Serving:
+  extra:
+    max_num_input_imgs: null</code></pre>
+</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><code>fileType</code></td>
+<td><code>integer</code>|<code>null</code></td>
+<td>File type.<code>0</code> represents a PDF file,<code>1</code> represents an image file. If this property is not present in the request body, the file type will be inferred from the URL.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>useDocOrientationClassify</code></td>
+<td><code>boolean</code> | <code>null</code></td>
+<td>Please refer to the description of the <code>use_doc_orientation_classify</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>否</td>
+</tr>
+<tr>
+<td><code>useDocUnwarping</code></td>
+<td><code>boolean</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>use_doc_unwarping</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>useLayoutDetection</code></td>
+<td><code>boolean</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>use_layout_detection</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>useChartRecognition</code></td>
+<td><code>boolean</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>use_chart_recognition</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>layoutThreshold</code></td>
+<td><code>number</code>|<code>object</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>layout_threshold</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>layoutNms</code></td>
+<td><code>boolean</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>layout_nms</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>layoutUnclipRatio</code></td>
+<td><code>number</code>|<code>array</code>|<code>object</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>layout_unclip_ratio</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>layoutMergeBboxesMode</code></td>
+<td><code>string</code>|<code>object</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>layout_merge_bboxes_mode</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>promptLabel</code></td>
+<td><code>string</code>|<code>object</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>prompt_label</code> parameter in the  <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>formatBlockContent</code></td>
+<td><code>boolean</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>format_block_content</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>repetitionPenalty</code></td>
+<td><code>number</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>repetition_penalty</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>temperature</code></td>
+<td><code>number</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>temperature</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>topP</code></td>
+<td><code>number</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>top_p</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>minPixels</code></td>
+<td><code>number</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>min_pixels</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>maxPixels</code></td>
+<td><code>number</code>|<code>null</code></td>
+<td>Please refer to the description of the <code>max_pixels</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>prettifyMarkdown</code></td>
+<td><code>boolean</code></td>
+<td>Whether to output beautified Markdown text. The default is <code>true</code>.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>showFormulaNumber</code></td>
+<td><code>boolean</code></td>
+<td>Whether to include formula numbers in the output Markdown text. The default is <code>false</code>.</td>
+<td>No</td>
+</tr>
+<tr>
+<td><code>visualize</code></td>
+<td><code>boolean</code>|<code>null</code></td>
+<td>Whether to return visualization result images and intermediate images during the processing.<ul style="margin: 0 0 0 1em; padding-left: 0em;">
+<li>Pass <code>true</code>: Return images.</li>
+<li>Pass <code>false</code>: Do not return images.</li>
+<li>If this parameter is not provided in the request body or <code>null</code> is passed: Follow the setting in the configuration file <code>Serving.visualize</code>.</li>
+</ul>
+<br/>For example, add the following field in the configuration file:<br/>
+<pre><code>Serving:
+  visualize: False</code></pre>Images will not be returned by default, and the default behavior can be overridden by the <code>visualize</code> parameter in the request body. If this parameter is not set in either the request body or the configuration file (or <code>null</code> is passed in the request body and the configuration file is not set), images will be returned by default.</td>
+<td>No</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>When the request is processed successfully, the <code>result</code> in the response body has the following attributes:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>layoutParsingResults</code></td>
+<td><code>array</code></td>
+<td>Layout parsing results. The array length is 1 (for image input) or the actual number of document pages processed (for PDF input). For PDF input, each element in the array represents the result of each actual page processed in the PDF file.</td>
+</tr>
+<tr>
+<td><code>dataInfo</code></td>
+<td><code>object</code></td>
+<td>Input data information.</td>
+</tr>
+</tbody>
+</table>
+<p>Each element in<code>layoutParsingResults</code> is an <code>object</code> with the following attributes:</p>
+<table>
+<thead>
+<tr>
+<th>Meaning</th>
+<th>Name</th>
+<th>Type</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>prunedResult</code></td>
+<td><code>object</code></td>
+<td>A simplified version of the <code>res</code> field in the JSON representation of the results generated by the <code>predict</code> method of the object, with the <code>input_path</code> and <code>page_index</code> fields removed.</td>
+</tr>
+<tr>
+<td><code>markdown</code></td>
+<td><code>object</code></td>
+<td>Markdown results.</td>
+</tr>
+<tr>
+<td><code>outputImages</code></td>
+<td><code>object</code>|<code>null</code></td>
+<td>Refer to the <code>img</code> property description of the prediction results. The image is in JPEG format and encoded using Base64.</td>
+</tr>
+<tr>
+<td><code>inputImage</code></td>
+<td><code>string</code>|<code>null</code></td>
+<td>Input image. The image is in JPEG format and encoded using Base64.</td>
+</tr>
+</tbody>
+</table>
+<p><code>markdown</code>is an <code>object</code>with the following properties:</p>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>text</code></td>
+<td><code>string</code></td>
+<td>Markdown text.</td>
+</tr>
+<tr>
+<td><code>images</code></td>
+<td><code>object</code></td>
+<td>Key-value pairs of relative paths to Markdown images and Base64-encoded images.</td>
+</tr>
+<tr>
+<td><code>isStart</code></td>
+<td><code>boolean</code></td>
+<td>Whether the first element on the current page is the start of a paragraph.</td>
+</tr>
+<tr>
+<td><code>isEnd</code></td>
+<td><code>boolean</code></td>
+<td>Whether the last element on the current page is the end of a paragraph.</td>
+</tr>
+</tbody>
+</table></details>
+<details><summary>Multilingual Service Invocation Example</summary>
+<details>
+<summary>Python</summary>
+
+<pre><code class="language-python">
+import base64
+import requests
+import pathlib
+
+API_URL = "http://localhost:8080/layout-parsing" # Service URL
+
+image_path = "./demo.jpg"
+
+# Encode the local image in Base64
+with open(image_path, "rb") as file:
+    image_bytes = file.read()
+    image_data = base64.b64encode(image_bytes).decode("ascii")
+
+payload = {
+    "file": image_data, # Base64-encoded file content or file URL
+    "fileType": 1, # File type, 1 indicates an image file
+}
+
+# Call the API
+response = requests.post(API_URL, json=payload)
+
+# Process the returned data from the interface
+assert response.status_code == 200
+result = response.json()["result"]
+for i, res in enumerate(result["layoutParsingResults"]):
+    print(res["prunedResult"])
+    md_dir = pathlib.Path(f"markdown_{i}")
+    md_dir.mkdir(exist_ok=True)
+    (md_dir / "doc.md").write_text(res["markdown"]["text"])
+    for img_path, img in res["markdown"]["images"].items():
+        img_path = md_dir / img_path
+        img_path.parent.mkdir(parents=True, exist_ok=True)
+        img_path.write_bytes(base64.b64decode(img))
+    print(f"Markdown document saved at {md_dir / 'doc.md'}")
+    for img_name, img in res["outputImages"].items():
+        img_path = f"{img_name}_{i}.jpg"
+        pathlib.Path(img_path).parent.mkdir(exist_ok=True)
+        with open(img_path, "wb") as f:
+            f.write(base64.b64decode(img))
+        print(f"Output image saved at {img_path}")
+</code></pre></details>
+
+<details><summary>C++</summary>
+
+<pre><code class="language-cpp">#include &lt;iostream&gt;
+#include &lt;filesystem&gt;
+#include &lt;fstream&gt;
+#include &lt;vector&gt;
+#include &lt;string&gt;
+#include "cpp-httplib/httplib.h" // https://github.com/Huiyicc/cpp-httplib
+#include "nlohmann/json.hpp" // https://github.com/nlohmann/json
+#include "base64.hpp" // https://github.com/tobiaslocker/base64
+
+namespace fs = std::filesystem;
+
+int main() {
+    httplib::Client client("localhost", 8080);
+
+    const std::string filePath = "./demo.jpg";
+
+    std::ifstream file(filePath, std::ios::binary | std::ios::ate);
+    if (!file) {
+        std::cerr << "Error opening file: " << filePath << std::endl;
+        return 1;
+    }
+
+    std::streamsize size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    std::vector<char> buffer(size);
+    if (!file.read(buffer.data(), size)) {
+        std::cerr << "Error reading file." << std::endl;
+        return 1;
+    }
+
+    std::string bufferStr(buffer.data(), static_cast<size_t>(size));
+    std::string encodedFile = base64::to_base64(bufferStr);
+
+    nlohmann::json jsonObj;
+    jsonObj["file"] = encodedFile;
+    jsonObj["fileType"] = 1;
+
+    auto response = client.Post("/layout-parsing", jsonObj.dump(), "application/json");
+
+    if (response && response->status == 200) {
+        nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
+        auto result = jsonResponse["result"];
+
+        if (!result.is_object() || !result.contains("layoutParsingResults")) {
+            std::cerr << "Unexpected response format." << std::endl;
+            return 1;
+        }
+
+        const auto& results = result["layoutParsingResults"];
+        for (size_t i = 0; i < results.size(); ++i) {
+            const auto& res = results[i];
+
+            if (res.contains("prunedResult")) {
+                std::cout << "Layout result [" << i << "]: " << res["prunedResult"].dump() << std::endl;
+            }
+
+            if (res.contains("outputImages") && res["outputImages"].is_object()) {
+                for (auto& [imgName, imgBase64] : res["outputImages"].items()) {
+                    std::string outputPath = imgName + "_" + std::to_string(i) + ".jpg";
+                    fs::path pathObj(outputPath);
+                    fs::path parentDir = pathObj.parent_path();
+                    if (!parentDir.empty() && !fs::exists(parentDir)) {
+                        fs::create_directories(parentDir);
+                    }
+
+                    std::string decodedImage = base64::from_base64(imgBase64.get<std::string>());
+
+                    std::ofstream outFile(outputPath, std::ios::binary);
+                    if (outFile.is_open()) {
+                        outFile.write(decodedImage.c_str(), decodedImage.size());
+                        outFile.close();
+                        std::cout << "Saved image: " << outputPath << std::endl;
+                    } else {
+                        std::cerr << "Failed to save image: " << outputPath << std::endl;
+                    }
+                }
+            }
+        }
+    } else {
+        std::cerr << "Request failed." << std::endl;
+        if (response) {
+            std::cerr << "HTTP status: " << response->status << std::endl;
+            std::cerr << "Response body: " << response->body << std::endl;
+        }
+        return 1;
+    }
+
+    return 0;
+}
+</code></pre></details>
+
+<details><summary>Java</summary>
+
+<pre><code class="language-java">import okhttp3.*;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Base64;
+import java.nio.file.Paths;
+import java.nio.file.Files;
+
+public class Main {
+    public static void main(String[] args) throws IOException {
+        String API_URL = "http://localhost:8080/layout-parsing";
+        String imagePath = "./demo.jpg";
+
+        File file = new File(imagePath);
+        byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
+        String base64Image = Base64.getEncoder().encodeToString(fileContent);
+
+        ObjectMapper objectMapper = new ObjectMapper();
+        ObjectNode payload = objectMapper.createObjectNode();
+        payload.put("file", base64Image);
+        payload.put("fileType", 1);
+
+        OkHttpClient client = new OkHttpClient();
+        MediaType JSON = MediaType.get("application/json; charset=utf-8");
+
+        RequestBody body = RequestBody.create(JSON, payload.toString());
+
+        Request request = new Request.Builder()
+                .url(API_URL)
+                .post(body)
+                .build();
+
+        try (Response response = client.newCall(request).execute()) {
+            if (response.isSuccessful()) {
+                String responseBody = response.body().string();
+                JsonNode root = objectMapper.readTree(responseBody);
+                JsonNode result = root.get("result");
+
+                JsonNode layoutParsingResults = result.get("layoutParsingResults");
+                for (int i = 0; i < layoutParsingResults.size(); i++) {
+                    JsonNode item = layoutParsingResults.get(i);
+                    int finalI = i;
+                    JsonNode prunedResult = item.get("prunedResult");
+                    System.out.println("Pruned Result [" + i + "]: " + prunedResult.toString());
+
+                    JsonNode outputImages = item.get("outputImages");
+                    outputImages.fieldNames().forEachRemaining(imgName -> {
+                        try {
+                            String imgBase64 = outputImages.get(imgName).asText();
+                            byte[] imgBytes = Base64.getDecoder().decode(imgBase64);
+                            String imgPath = imgName + "_" + finalI + ".jpg";
+
+                            File outputFile = new File(imgPath);
+                            File parentDir = outputFile.getParentFile();
+                            if (parentDir != null && !parentDir.exists()) {
+                                parentDir.mkdirs();
+                                System.out.println("Created directory: " + parentDir.getAbsolutePath());
+                            }
+
+                            try (FileOutputStream fos = new FileOutputStream(outputFile)) {
+                                fos.write(imgBytes);
+                                System.out.println("Saved image: " + imgPath);
+                            }
+                        } catch (IOException e) {
+                            System.err.println("Failed to save image: " + e.getMessage());
+                        }
+                    });
+                }
+            } else {
+                System.err.println("Request failed with HTTP code: " + response.code());
+            }
+        }
+    }
+}
+</code></pre></details>
+
+<details><summary>Go</summary>
+
+<pre><code class="language-go">package main
+
+import (
+    "bytes"
+    "encoding/base64"
+    "encoding/json"
+    "fmt"
+    "io/ioutil"
+    "net/http"
+    "os"
+    "path/filepath"
+)
+
+func main() {
+    API_URL := "http://localhost:8080/layout-parsing"
+    filePath := "./demo.jpg"
+
+    fileBytes, err := ioutil.ReadFile(filePath)
+    if err != nil {
+        fmt.Printf("Error reading file: %v\n", err)
+        return
+    }
+    fileData := base64.StdEncoding.EncodeToString(fileBytes)
+
+    payload := map[string]interface{}{
+        "file":     fileData,
+        "fileType": 1,
+    }
+    payloadBytes, err := json.Marshal(payload)
+    if err != nil {
+        fmt.Printf("Error marshaling payload: %v\n", err)
+        return
+    }
+
+    client := &http.Client{}
+    req, err := http.NewRequest("POST", API_URL, bytes.NewBuffer(payloadBytes))
+    if err != nil {
+        fmt.Printf("Error creating request: %v\n", err)
+        return
+    }
+    req.Header.Set("Content-Type", "application/json")
+
+    res, err := client.Do(req)
+    if err != nil {
+        fmt.Printf("Error sending request: %v\n", err)
+        return
+    }
+    defer res.Body.Close()
+
+    if res.StatusCode != http.StatusOK {
+        fmt.Printf("Unexpected status code: %d\n", res.StatusCode)
+        return
+    }
+
+    body, err := ioutil.ReadAll(res.Body)
+    if err != nil {
+        fmt.Printf("Error reading response: %v\n", err)
+        return
+    }
+
+    type Markdown struct {
+        Text   string            `json:"text"`
+        Images map[string]string `json:"images"`
+    }
+
+    type LayoutResult struct {
+        PrunedResult map[string]interface{} `json:"prunedResult"`
+        Markdown     Markdown               `json:"markdown"`
+        OutputImages map[string]string      `json:"outputImages"`
+        InputImage   *string                `json:"inputImage"`
+    }
+
+    type Response struct {
+        Result struct {
+            LayoutParsingResults []LayoutResult `json:"layoutParsingResults"`
+            DataInfo             interface{}    `json:"dataInfo"`
+        } `json:"result"`
+    }
+
+    var respData Response
+    if err := json.Unmarshal(body, &respData); err != nil {
+        fmt.Printf("Error parsing response: %v\n", err)
+        return
+    }
+
+    for i, res := range respData.Result.LayoutParsingResults {
+        fmt.Printf("Result %d - prunedResult: %+v\n", i, res.PrunedResult)
+
+        mdDir := fmt.Sprintf("markdown_%d", i)
+        os.MkdirAll(mdDir, 0755)
+        mdFile := filepath.Join(mdDir, "doc.md")
+        if err := os.WriteFile(mdFile, []byte(res.Markdown.Text), 0644); err != nil {
+            fmt.Printf("Error writing markdown file: %v\n", err)
+        } else {
+            fmt.Printf("Markdown document saved at %s\n", mdFile)
+        }
+
+        for path, imgBase64 := range res.Markdown.Images {
+            fullPath := filepath.Join(mdDir, path)
+            if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil {
+                fmt.Printf("Error creating directory for markdown image: %v\n", err)
+                continue
+            }
+            imgBytes, err := base64.StdEncoding.DecodeString(imgBase64)
+            if err != nil {
+                fmt.Printf("Error decoding markdown image: %v\n", err)
+                continue
+            }
+            if err := os.WriteFile(fullPath, imgBytes, 0644); err != nil {
+                fmt.Printf("Error saving markdown image: %v\n", err)
+            }
+        }
+
+        for name, imgBase64 := range res.OutputImages {
+            imgBytes, err := base64.StdEncoding.DecodeString(imgBase64)
+            if err != nil {
+                fmt.Printf("Error decoding output image %s: %v\n", name, err)
+                continue
+            }
+            filename := fmt.Sprintf("%s_%d.jpg", name, i)
+
+            if err := os.MkdirAll(filepath.Dir(filename), 0755); err != nil {
+                fmt.Printf("Error creating directory for output image: %v\n", err)
+                continue
+            }
+
+            if err := os.WriteFile(filename, imgBytes, 0644); err != nil {
+                fmt.Printf("Error saving output image %s: %v\n", filename, err)
+            } else {
+                fmt.Printf("Output image saved at %s\n", filename)
+            }
+        }
+    }
+}
+</code></pre></details>
+
+<details><summary>C#</summary>
+
+<pre><code class="language-csharp">using System;
+using System.IO;
+using System.Net.Http;
+using System.Text;
+using System.Threading.Tasks;
+using Newtonsoft.Json.Linq;
+
+class Program
+{
+    static readonly string API_URL = "http://localhost:8080/layout-parsing";
+    static readonly string inputFilePath = "./demo.jpg";
+
+    static async Task Main(string[] args)
+    {
+        var httpClient = new HttpClient();
+
+        byte[] fileBytes = File.ReadAllBytes(inputFilePath);
+        string fileData = Convert.ToBase64String(fileBytes);
+
+        var payload = new JObject
+        {
+            { "file", fileData },
+            { "fileType", 1 }
+        };
+        var content = new StringContent(payload.ToString(), Encoding.UTF8, "application/json");
+
+        HttpResponseMessage response = await httpClient.PostAsync(API_URL, content);
+        response.EnsureSuccessStatusCode();
+
+        string responseBody = await response.Content.ReadAsStringAsync();
+        JObject jsonResponse = JObject.Parse(responseBody);
+
+        JArray layoutParsingResults = (JArray)jsonResponse["result"]["layoutParsingResults"];
+        for (int i = 0; i < layoutParsingResults.Count; i++)
+        {
+            var res = layoutParsingResults[i];
+            Console.WriteLine($"[{i}] prunedResult:\n{res["prunedResult"]}");
+
+            JObject outputImages = res["outputImages"] as JObject;
+            if (outputImages != null)
+            {
+                foreach (var img in outputImages)
+                {
+                    string imgName = img.Key;
+                    string base64Img = img.Value?.ToString();
+                    if (!string.IsNullOrEmpty(base64Img))
+                    {
+                        string imgPath = $"{imgName}_{i}.jpg";
+                        byte[] imageBytes = Convert.FromBase64String(base64Img);
+
+                        string directory = Path.GetDirectoryName(imgPath);
+                        if (!string.IsNullOrEmpty(directory) && !Directory.Exists(directory))
+                        {
+                            Directory.CreateDirectory(directory);
+                            Console.WriteLine($"Created directory: {directory}");
+                        }
+
+                        File.WriteAllBytes(imgPath, imageBytes);
+                        Console.WriteLine($"Output image saved at {imgPath}");
+                    }
+                }
+            }
+        }
+    }
+}
+</code></pre></details>
+
+<details><summary>Node.js</summary>
+
+<pre><code class="language-js">const axios = require('axios');
+const fs = require('fs');
+const path = require('path');
+
+const API_URL = 'http://localhost:8080/layout-parsing';
+const imagePath = './demo.jpg';
+const fileType = 1;
+
+function encodeImageToBase64(filePath) {
+  const bitmap = fs.readFileSync(filePath);
+  return Buffer.from(bitmap).toString('base64');
+}
+
+const payload = {
+  file: encodeImageToBase64(imagePath),
+  fileType: fileType
+};
+
+axios.post(API_URL, payload)
+  .then(response => {
+    const results = response.data.result.layoutParsingResults;
+    results.forEach((res, index) => {
+      console.log(`\n[${index}] prunedResult:`);
+      console.log(res.prunedResult);
+
+      const outputImages = res.outputImages;
+      if (outputImages) {
+        Object.entries(outputImages).forEach(([imgName, base64Img]) => {
+          const imgPath = `${imgName}_${index}.jpg`;
+
+          const directory = path.dirname(imgPath);
+          if (!fs.existsSync(directory)) {
+            fs.mkdirSync(directory, { recursive: true });
+            console.log(`Created directory: ${directory}`);
+          }
+
+          fs.writeFileSync(imgPath, Buffer.from(base64Img, 'base64'));
+          console.log(`Output image saved at ${imgPath}`);
+        });
+      } else {
+        console.log(`[${index}] No outputImages.`);
+      }
+    });
+  })
+  .catch(error => {
+    console.error('Error during API request:', error.message || error);
+  });
+</code></pre></details>
+
+<details><summary>PHP</summary>
+
+<pre><code class="language-php">&lt;?php
+
+$API_URL = "http://localhost:8080/layout-parsing";
+$image_path = "./demo.jpg";
+
+$image_data = base64_encode(file_get_contents($image_path));
+$payload = array("file" => $image_data, "fileType" => 1);
+
+$ch = curl_init($API_URL);
+curl_setopt($ch, CURLOPT_POST, true);
+curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
+curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json'));
+curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+$response = curl_exec($ch);
+curl_close($ch);
+
+$result = json_decode($response, true)["result"]["layoutParsingResults"];
+
+foreach ($result as $i => $item) {
+    echo "[$i] prunedResult:\n";
+    print_r($item["prunedResult"]);
+
+    if (!empty($item["outputImages"])) {
+        foreach ($item["outputImages"] as $img_name => $img_base64) {
+            $output_image_path = "{$img_name}_{$i}.jpg";
+
+            $directory = dirname($output_image_path);
+            if (!is_dir($directory)) {
+                mkdir($directory, 0777, true);
+                echo "Created directory: $directory\n";
+            }
+
+            file_put_contents($output_image_path, base64_decode($img_base64));
+            echo "Output image saved at $output_image_path\n";
+        }
+    } else {
+        echo "No outputImages found for item $i\n";
+    }
+}
+?&gt;
+</code></pre></details>
+</details>
+<br/>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index d7d8db37bb..a340c74958 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -2,360 +2,343 @@
 comments: true
 ---
 
-# PaddleOCR-VL产线使用教程
+# PaddleOCR-VL介绍
 
-## PaddleOCR-VL产线介绍
+PaddleOCR-VL 是一款先进、高效的文档解析模型，专为文档中的元素识别设计。其核心组件为 PaddleOCR-VL-0.9B，这是一种紧凑而强大的视觉语言模型（VLM），它由 NaViT 风格的动态分辨率视觉编码器与 ERNIE-4.5-0.3B 语言模型组成，能够实现精准的元素识别。该模型支持 109 种语言，并在识别复杂元素（如文本、表格、公式和图表）方面表现出色，同时保持极低的资源消耗。通过在广泛使用的公开基准与内部基准上的全面评测，PaddleOCR-VL 在页级级文档解析与元素级识别均达到 SOTA 表现。它显著优于现有的基于Pipeline方案和文档解析多模态方案以及先进的通用多模态大模型，并具备更快的推理速度。这些优势使其非常适合在真实场景中落地部署。
 
-<待补充>
+<img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/refs/heads/main/images/paddleocr_vl/metrics/allmetric.png"/>
 
-### 1.1 模型基准测试数据
+## 1. 环境准备
 
-<b>如您更考虑模型精度，请选择精度较高的模型，如您更考虑模型推理速度，请选择推理速度较快的模型，如您更考虑模型存储大小，请选择存储大小较小的模型</b>。
+安装 PaddlePaddle 和 PaddleX:
 
-> 推理耗时仅包含模型推理耗时，不包含前后处理耗时。
+```shell
+python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+python -m pip install paddlex
+python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+```
+> 对于 Windows 用户，请使用 WSL 或者 Docker 进行环境搭建。
 
-<details><summary> 👉模型列表详情</summary>
-<p><b>文档图像方向分类模块：</b></p>
-<table>
+运行 PaddleOCR-VL 对 GPU 硬件有以下要求：
+
+<table border="1">
 <thead>
-<tr>
-<th>模型</th><th>模型下载链接</th>
-<th>Top-1 Acc（%）</th>
-<th>GPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>CPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>模型存储大小（MB）</th>
-<th>介绍</th>
-</tr>
+  <tr>
+    <th>推理方式</th>
+    <th>GPU Compute Capability</th>
+  </tr>
 </thead>
 <tbody>
-<tr>
-<td>PP-LCNet_x1_0_doc_ori</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-LCNet_x1_0_doc_ori_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-LCNet_x1_0_doc_ori_pretrained.pdparams">训练模型</a></td>
-<td>99.06</td>
-<td>2.62 / 0.59</td>
-<td>3.24 / 1.19</td>
-<td>7</td>
-<td>基于PP-LCNet_x1_0的文档图像分类模型，含有四个类别，即0度，90度，180度，270度</td>
-</tr>
+  <tr>
+    <td>PaddlePaddle</td>
+    <td>≥ 8.5</td>
+  </tr>
+  <tr>
+    <td>vLLM</td>
+    <td>≥ 8 （RTX 3060，RTX 5070，A10，A100, ...） <br />
+    7 ≤ GPU Compute Capability < 8 （T4，V100，...）支持运行，但可能出现请求超时、OOM 等异常情况，不推荐使用
+    </td>
+  </tr>
+  <tr>
+    <td>SGLang</td>
+    <td>8 ≤ GPU Compute Capability < 12</td>
+  </tr>
 </tbody>
 </table>
-<p><b>文本图像矫正模块：</b></p>
+
+目前 PaddleOCR-VL 暂不支持 CPU 及 Arm 架构，后续将根据实际需求扩展更多硬件支持，敬请期待！
+
+## 2. 快速开始
+
+PaddleOCR-VL 支持 CLI 命令行方式和 Python API 两种使用方式，其中 CLI 命令行方式更简单，适合快速验证功能，而 Python API 方式更灵活，适合集成到现有项目中。
+
+### 2.1 命令行方式体验
+
+一行命令即可快速体验 PaddleOCR-VL 效果：
+
+```bash
+paddlex --pipeline PaddleOCR-VL --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/pp_ocr_vl_demo.png
+
+# 通过 --use_doc_orientation_classify 指定是否使用文档方向分类模型
+paddlex --pipeline PaddleOCR-VL --input ./paddleocr_vl_demo.png --use_doc_orientation_classify True
+
+# 通过 --use_doc_unwarping 指定是否使用文本图像矫正模块
+paddlex --pipeline PaddleOCR-VL --input ./paddleocr_vl_demo.png --use_doc_unwarping True
+
+# 通过 --use_layout_detection 指定是否使用版面区域检测排序模块
+paddlex --pipeline PaddleOCR-VL --input ./paddleocr_vl_demo.png --use_layout_detection False
+```
+
+<details><summary><b>命令行支持更多参数设置，点击展开以查看命令行参数的详细说明</b></summary>
 <table>
 <thead>
 <tr>
-<th>模型</th><th>模型下载链接</th>
-<th>CER </th>
-<th>GPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>CPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>模型存储大小（MB）</th>
-<th>介绍</th>
+<th>参数</th>
+<th>参数说明</th>
+<th>参数类型</th>
 </tr>
 </thead>
 <tbody>
 <tr>
-<td>UVDoc</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/UVDoc_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/UVDoc_pretrained.pdparams">训练模型</a></td>
-<td>0.179</td>
-<td>19.05 / 19.05</td>
-<td>- / 869.82</td>
-<td>30.3</td>
-<td>高精度文本图像矫正模型</td>
+<td><code>input</code></td>
+<td>待预测数据，必填。
+如图像文件或者PDF文件的本地路径：<code>/root/data/img.jpg</code>；<b>如URL链接</b>，如图像文件或PDF文件的网络URL：<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png">示例</a>；<b>如本地目录</b>，该目录下需包含待预测图像，如本地路径：<code>/root/data/</code>(当前不支持目录中包含PDF文件的预测，PDF文件需要指定到具体文件路径)。
+</td>
+<td><code>str</code></td>
 </tr>
-</tbody>
-</table>
-
-<p><b>版面区域检测排序模块模型：</b></p>
-<table>
-<thead>
 <tr>
-<th>模型</th><th>模型下载链接</th>
-<th>mAP(0.5)（%）</th>
-<th>GPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>CPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>模型存储大小（MB）</th>
-<th>介绍</th>
+<td><code>save_path</code></td>
+<td>指定推理结果文件保存的路径。如果不设置，推理结果将不会保存到本地。</td>
+<td><code>str</code></td>
 </tr>
-</thead>
-<tbody>
 <tr>
-<td>PP-DocLayoutV2</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocLayoutV2_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-DocLayoutV2_pretrained.pdparams">训练模型</a></td>
-<td>-</td>
-<td>- / -</td>
-<td>- / -</td>
-<td>-</td>
-<td>基于RT-DETR-L在包含中英文论文、杂志、合同、书本、试卷和研报等场景的自建数据集训练的高精度版面区域定位和区域排序一体模型</td>
+<td><code>layout_detection_model_name</code></td>
+<td>版面区域检测排序模型名称。如果不设置，将会使用默认模型。</td>
+<td><code>str</code></td>
 </tr>
-</tbody>
-</table>
-
-<p><b>多模态识别模块模型：</b></p>
-<table>
 <tr>
-<th>模型</th><th>模型下载链接</th>
-<th>Top1 Acc(%)</th>
-<th>GPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>CPU推理耗时（ms）<br/>[常规模式 / 高性能模式]</th>
-<th>模型存储大小（MB）</th>
+<td><code>layout_detection_model_dir</code></td>
+<td>版面区域检测排序模型的目录路径。如果不设置，将会下载官方模型。</td>
+<td><code>str</code></td>
 </tr>
 <tr>
-<td>PaddleOCR-VL</td>
-<td><a href="待补充">推理模型</a>/<a href="待补充">训练模型</a></td>
-<td>-</td>
-<td>- / -</td>
-<td>- / -</td>
-<td>-</td>
+<td><code>layout_threshold</code></td>
+<td>版面模型得分阈值。<code>0-1</code> 之间的任意浮点数。如果不设置，将使用初始化的默认值。
+</td>
+<td><code>float</code></td>
 </tr>
-</table>
-
-<strong>测试环境说明:</strong>
-
-  <ul>
-      <li><b>性能测试环境</b>
-          <ul>
-            <li><strong>测试数据集：
-             </strong>
-                <ul>
-                  <li>文档图像方向分类模型：PaddleX 自建的数据集，覆盖证件和文档等多个场景，包含 1000 张图片。</li>
-                  <li> 文本图像矫正模型：<a href="https://www3.cs.stonybrook.edu/~cvl/docunet.html">DocUNet</a>。</li>
-                  <li>版面区域检测模型：PaddleOCR 自建的版面区域分析数据集，包含中英文论文、杂志和研报等常见的 1w 张文档类型图片。</li>
-                  <li>PP-DocLayout_plus-L：PaddleOCR 自建的版面区域检测数据集，包含中英文论文、杂志、报纸、研报、PPT、试卷、课本等 1300 张文档类型图片。</li>
-                </ul>
-             </li>
-              <li><strong>硬件配置：</strong>
-                  <ul>
-                      <li>GPU：NVIDIA Tesla T4</li>
-                      <li>CPU：Intel Xeon Gold 6271C @ 2.60GHz</li>
-                  </ul>
-              </li>
-              <li><strong>软件环境：</strong>
-                  <ul>
-                      <li>Ubuntu 20.04 / CUDA 11.8 / cuDNN 8.9 / TensorRT 8.6.1.6</li>
-                      <li>paddlepaddle 3.0.0 / paddlex 3.0.3</li>
-                  </ul>
-              </li>
-          </ul>
-      </li>
-      <li><b>推理模式说明</b></li>
-  </ul>
-
-<table border="1">
-    <thead>
-        <tr>
-            <th>模式</th>
-            <th>GPU配置</th>
-            <th>CPU配置</th>
-            <th>加速技术组合</th>
-        </tr>
-    </thead>
-    <tbody>
-        <tr>
-            <td>常规模式</td>
-            <td>FP32精度 / 无TRT加速</td>
-            <td>FP32精度 / 8线程</td>
-            <td>PaddleInference</td>
-        </tr>
-        <tr>
-            <td>高性能模式</td>
-            <td>选择先验精度类型和加速策略的最优组合</td>
-            <td>FP32精度 / 8线程</td>
-            <td>选择先验最优后端（Paddle/OpenVINO/TRT等）</td>
-        </tr>
-    </tbody>
-</table>
-
-</details>
-
-### 1.2 产线基准测试数据
-
-<details>
-<summary>点击展开/折叠表格</summary>
-<待补充>
-<table border="1">
-<tr><th>流水线配置</th><th>硬件</th><th>平均推理时间 (s)</th><th>峰值CPU利用率 (%)</th><th>平均CPU利用率 (%)</th><th>峰值主机内存 (MB)</th><th>平均主机内存 (MB)</th><th>峰值GPU利用率 (%)</th><th>平均GPU利用率 (%)</th><th>峰值设备内存 (MB)</th><th>平均设备内存 (MB)</th></tr>
-<tr>
-<td rowspan="5">PP_StructureV3-default</td>
-<td>Intel 8350C + A100</td>
-<td>1.38</td>
-<td>1384.60</td>
-<td>113.26</td>
-<td>5781.59</td>
-<td>3431.21</td>
-<td>100</td>
-<td>32.79</td>
-<td>37370.00</td>
-<td>34165.68</td>
-</tr>
-<tr>
-<td>Intel 6271C + V100</td>
-<td>2.38</td>
-<td>608.70</td>
-<td>109.96</td>
-<td>6388.91</td>
-<td>3737.19</td>
-<td>100</td>
-<td>39.08</td>
-<td>26824.00</td>
-<td>24581.61</td>
-</tr>
-<tr>
-<td>Intel 8563C + H20</td>
-<td>1.36</td>
-<td>744.30</td>
-<td>112.82</td>
-<td>6199.01</td>
-<td>3865.78</td>
-<td>100</td>
-<td>43.81</td>
-<td>35132.00</td>
-<td>32077.12</td>
-</tr>
-<tr>
-<td>Intel 8350C + A10</td>
-<td>1.74</td>
-<td>418.50</td>
-<td>105.96</td>
-<td>6138.25</td>
-<td>3503.41</td>
-<td>100</td>
-<td>48.54</td>
-<td>18536.00</td>
-<td>18353.93</td>
-</tr>
-<tr>
-<td>Intel 6271C + T4</td>
-<td>3.70</td>
-<td>434.40</td>
-<td>105.45</td>
-<td>6865.87</td>
-<td>3595.68</td>
-<td>100</td>
-<td>71.92</td>
-<td>13970.00</td>
-<td>12668.58</td>
+<tr>
+<td><code>layout_nms</code></td>
+<td>版面检测是否使用后处理NMS。如果不设置，将使用初始化的默认值。</td>
+<td><code>bool</code></td>
+</tr>
+<tr>
+<td><code>layout_unclip_ratio</code></td>
+<td>版面区域检测模型检测框的扩张系数。
+任意大于 <code>0</code>  浮点数。如果不设置，将使用初始化的默认值
+</td>
+<td><code>float</code></td>
+</tr>
+<tr>
+<td><code>layout_merge_bboxes_mode</code></td>
+<td>版面检测中模型输出的检测框的合并处理模式。
+<ul>
+<li><b>large</b>，设置为large时，表示在模型输出的检测框中，对于互相重叠包含的检测框，只保留外部最大的框，删除重叠的内部框；</li>
+<li><b>small</b>，设置为small，表示在模型输出的检测框中，对于互相重叠包含的检测框，只保留内部被包含的小框，删除重叠的外部框；</li>
+<li><b>union</b>，不进行框的过滤处理，内外框都保留；</li>
+</ul>如果不设置，将使用初始化的参数值。
+</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_name</code></td>
+<td>多模态识别模型名称。如果不设置，将会使用默认模型。</td>
+<td><code>str</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_dir</code></td>
+<td>多模态识别模型目录路径。如果不设置，将会下载官方模型。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>vl_rec_backend</code></td>
+<td>多模态识别模型使用的推理后端。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>vl_rec_server_url</code></td>
+<td>如果多模态识别模型使用推理服务，该参数用于指定服务器URL。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>vl_rec_max_concurrency</code></td>
+<td>如果多模态识别模型使用推理服务，该参数用于指定最大并发请求数。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_name</code></td>
+<td>文档方向分类模型的名称。如果不设置，将使用初始化的默认值。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_dir</code></td>
+<td>文档方向分类模型的目录路径。如果不设置，将会下载官方模型。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_name</code></td>
+<td>文本图像矫正模型的名称。如果不设置，将使用初始化的默认值。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_dir</code></td>
+<td>文本图像矫正模型的目录路径。如果不设置，将会下载官方模型。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_doc_orientation_classify</code></td>
+<td>是否加载并使用文档方向分类模块。如果不设置，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_doc_unwarping</code></td>
+<td>是否加载并使用文本图像矫正模块。如果不设置，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_layout_detection</code></td>
+<td>是否加载并使用版面区域检测排序模块。如果不设置，将使用初始化的默认值，默认初始化为<code>True</code>。</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_chart_recognition</code></td>
+<td>是否使用图表解析功能。如果不设置，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>format_block_content</code></td>
+<td>控制是否将 <code>block_content</code> 中的内容格式化为Markdown格式。如果不设置，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>use_queues</code></td>
+<td>用于控制是否启用内部队列。当设置为 <code>True</code> 时，数据加载（如将 PDF 页面渲染为图像）、版面检测模型处理以及 VLM 推理将分别在独立线程中异步执行，通过队列传递数据，从而提升效率。对于页数较多的 PDF 文档，或是包含大量图像或 PDF 文件的目录，这种方式尤其高效。</td>
+<td><code>bool</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>prompt_label</code></td>
+<td>VL模型的 prompt 类型设置，当且仅当 <code>use_layout_detection=False</code> 时生效。</td>
+<td><code>str</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>repetition_penalty</code></td>
+<td>VL模型采样使用的重复惩罚参数。</td>
+<td><code>float</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>temperature</code></td>
+<td>VL模型采样使用的温度参数。</td>
+<td><code>float</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>top_p</code></td>
+<td>VL模型采样使用的top-p参数。</td>
+<td><code>float</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>min_pixels</code></td>
+<td>VL模型预处理图像时允许的最小像素数。</td>
+<td><code>int</code></td>
+<td></td>
 </tr>
-</table>
-
-
-<table border="1">
-<tr><th>Pipeline configuration</th><th>description</th></tr>
 <tr>
-<td>PP_StructureV3-default</td>
-<td>默认配置</td>
+<td><code>max_pixels</code></td>
+<td>VL模型预处理图像时允许的最大像素数。</td>
+<td><code>int</code></td>
+<td></td>
+</tr>
+<tr>
+<td><code>device</code></td>
+<td>用于推理的设备。支持指定具体卡号：
+<ul>
+<li><b>CPU</b>：如 <code>cpu</code> 表示使用 CPU 进行推理；</li>
+<li><b>GPU</b>：如 <code>gpu:0</code> 表示使用第 1 块 GPU 进行推理；</li>
+<li><b>NPU</b>：如 <code>npu:0</code> 表示使用第 1 块 NPU 进行推理；</li>
+<li><b>XPU</b>：如 <code>xpu:0</code> 表示使用第 1 块 XPU 进行推理；</li>
+<li><b>MLU</b>：如 <code>mlu:0</code> 表示使用第 1 块 MLU 进行推理；</li>
+<li><b>DCU</b>：如 <code>dcu:0</code> 表示使用第 1 块 DCU 进行推理；</li>
+</ul>如果不设置，将使用初始化的默认值，初始化时，会优先使用本地的 GPU 0号设备，如果没有，则使用 CPU 设备。
+</td>
+<td><code>str</code></td>
+<td></td>
 </tr>
 <tr>
-<td>PP_StructureV3-pp</td>
-<td>默认配置基础上，开启文档图像预处理</td>
+<td><code>enable_hpi</code></td>
+<td>是否启用高性能推理。</td>
+<td><code>bool</code></td>
 </tr>
 <tr>
-<td>PP_StructureV3-full</td>
-<td>默认配置基础上，开启文档图像预处理和图表解析</td>
+<td><code>use_tensorrt</code></td>
+<td>是否启用 Paddle Inference 的 TensorRT 子图引擎。如果模型不支持通过 TensorRT 加速，即使设置了此标志，也不会使用加速。<br/>
+对于 CUDA 11.8 版本的飞桨，兼容的 TensorRT 版本为 8.x（x>=6），建议安装 TensorRT 8.6.1.6。<br/>
+</td>
+<td><code>bool</code></td>
 </tr>
 <tr>
-<td>PP_StructureV3-seal</td>
-<td>默认配置基础上，开启印章文本识别</td>
+<td><code>precision</code></td>
+<td>计算精度，如 fp32、fp16。</td>
+<td><code>str</code></td>
 </tr>
 <tr>
-<td>PP_StructureV3-chart</td>
-<td>默认配置基础上，开启文档图表解析</td>
+<td><code>enable_mkldnn</code></td>
+<td>是否启用 MKL-DNN 加速推理。如果 MKL-DNN 不可用或模型不支持通过 MKL-DNN 加速，即使设置了此标志，也不会使用加速。
+</td>
+<td><code>bool</code></td>
 </tr>
 <tr>
-<td>PP_StructureV3-notable</td>
-<td>默认配置基础上，关闭表格识别</td>
+<td><code>mkldnn_cache_capacity</code></td>
+<td>
+MKL-DNN 缓存容量。
+</td>
+<td><code>int</code></td>
 </tr>
 <tr>
-<td>PP_StructureV3-noformula</td>
-<td>默认配置基础上，关闭公式识别</td>
+<td><code>cpu_threads</code></td>
+<td>在 CPU 上进行推理时使用的线程数。</td>
+<td><code>int</code></td>
 </tr>
 <tr>
-<td>PP_StructureV3-lightweight</td>
-<td>默认配置基础上，将所有任务模型都换成最轻量版本</td>
+<td><code>paddlex_config</code></td>
+<td>PaddleX产线配置文件路径。</td>
+<td><code>str</code></td>
+<td></td>
 </tr>
+</tbody>
 </table>
 </details>
+<br />
 
-
-* 测试环境：
-    * PaddlePaddle 3.1.0、CUDA 11.8、cuDNN 8.9
-    * PaddleX @ develop (f1eb28e23cfa54ce3e9234d2e61fcb87c93cf407)
-    * Docker image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:3.1.0-gpu-cuda11.8-cudnn8.9
-* 测试数据：
-    * 测试数据包含表格、印章、公式、图表的280张图像。
-* 测试策略：
-    * 使用 20 个样本进行预热，然后对整个数据集重复 1 次以进行速度性能测试。
-* 备注：
-    * 由于我们没有收集NPU和XPU的设备内存数据，因此表中相应位置的数据标记为N/A。
-
-## 2. 快速开始
-
-PaddleX 所提供的模型产线均可以快速体验效果，你可以在本地使用命令行或 Python 体验通用通用版面解析v3产线的效果。
-
-在本地使用通用版面解析v3产线前，请确保您已经按照[PaddleX本地安装教程](../../../installation/installation.md)完成了PaddleX的wheel包安装。如果您希望选择性安装依赖，请参考安装教程中的相关说明。该产线对应的依赖分组为 `ocr`。此外，为了使用飞桨框架读取 safetensors 格式模型，请执行如下命令安装 safetensors：
-
-```bash
-python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
-```
-
-> 默认配置暂不支持 Compute Capability 低于 8.0 的 GPU（如 V100、RTX 3060 等）。请参阅下一节，了解如何在此类 GPU 上使用推理加速框架。
-
-### 2.1 命令行方式体验
-一行命令即可快速体验版面解析产线效果，使用 [测试文件](https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/paddleocr_vl_demo.png)，并将 `--input` 替换为本地路径，进行预测
-
-```
-paddlex --pipeline PaddleOCR-VL \
-        --input paddleocr_vl_demo.png \
-        --use_doc_orientation_classify False \
-        --use_doc_unwarping False \
-        --save_path ./output \
-        --device gpu:0
-```
-
-<b>注：</b>PaddleX 支持多个模型托管平台，官方模型默认优先从 HuggingFace 下载。PaddleX 也支持通过环境变量 `PADDLE_PDX_MODEL_SOURCE` 设置优先使用的托管平台，目前支持 `huggingface`、`aistudio`、`bos`、`modelscope`，如优先使用 `bos`：`PADDLE_PDX_MODEL_SOURCE="bos"`；
-
-相关的参数说明可以参考[2.2.2 Python脚本方式集成](#222-python脚本方式集成)中的参数说明。支持同时指定多个设备以进行并行推理，详情请参考 [产线并行推理](../../instructions/parallel_inference.md#指定多个推理设备)。
-
-运行后，会将结果打印到终端上，结果如下：
+运行结果会被打印到终端上，默认配置的 PaddleOCR-VL 的运行结果如下：
 
 <details><summary> 👉点击展开</summary>
-<pre><code>
-{'res': {'input_path': 'paddleocr_vl_demo.png', 'page_index': None, 'model_settings': {'use_doc_preprocessor': False, 'use_seal_recognition': False, 'use_table_recognition': True, 'use_formula_recognition': True, 'use_chart_recognition': False, 'use_region_detection': True}, 'parsing_res_list': [{'block_label': 'doc_title', 'block_content': '助力双方交往搭建友谊桥梁', 'block_bbox': [133, 36, 1379, 123], 'block_id': 0, 'block_order': 1}, {'block_label': 'text', 'block_content': '本报记者沈小晓任彦黄培昭', 'block_bbox': [584, 159, 927, 179], 'block_id': 1, 'block_order': 2}, {'block_label': 'image', 'block_content': '', 'block_bbox': [774, 201, 1502, 685], 'block_id': 2, 'block_order': None}, {'block_label': 'figure_title', 'block_content': '在厄立特里亚不久前举办的第六届中国风筝文化节上，当地小学生体验风筝制作。中国驻厄立特里亚大使馆供图', 'block_bbox': [808, 704, 1484, 747], 'block_id': 3, 'block_order': None}, {'block_label': 'text', 'block_content': '身着中国传统民族服装的厄立特里亚青年依次登台表演中国民族舞、现代舞、扇子舞等，曼妙的舞姿赢得现场观众阵阵掌声。这是日前危立特里亚高等教育与研究院孔子学院(以下简称“厄特孔院")举办“喜迎新年"中国歌舞比赛的场景。\n', 'block_bbox': [9, 201, 358, 338], 'block_id': 4, 'block_order': 3}, {'block_label': 'text', 'block_content': '中国和厄立特里亚传统友谊深厚。近年来，在高质量共建“一带一路”框架下，中厄两国人文交流不断深化，互利合作的民意基础日益深厚。\n', 'block_bbox': [9, 345, 358, 435], 'block_id': 5, 'block_order': 4}, {'block_label': 'paragraph_title', 'block_content': '“学好中文，我们的未来不是梦”\n', 'block_bbox': [28, 456, 339, 514], 'block_id': 6, 'block_order': 5}, {'block_label': 'text', 'block_content': '“鲜花曾告诉我你怎样走过，大地知道你心中的每一个角落……"厄立特里亚阿斯马拉大学综合楼二层，一阵优美的歌声在走廊里回响。循着熟悉的旋律轻轻推开一间教室的门，学生们正跟着老师学唱中文歌曲《同一首歌》。', 'block_bbox': [9, 536, 358, 651], 'block_id': 7, 'block_order': 6}, {'block_label': 'text', 'block_content': '这是厄特孔院阿斯马拉大学教学点的一节中文歌曲课。为了让学生们更好地理解歌词大意，老师尤斯拉·穆罕默德萨尔·侯赛因逐字翻译和解释歌词。随着伴奏声响起，学生们边唱边随着节拍摇动身体，现场气氛热烈。', 'block_bbox': [9, 658, 359, 770], 'block_id': 8, 'block_order': 7}, {'block_label': 'text', 'block_content': '“这是中文歌曲初级班，共有32人。学生大部分来自首都阿斯马拉的中小学，年龄最小的仅有6岁。”尤斯拉告诉记者。', 'block_bbox': [10, 776, 359, 842], 'block_id': 9, 'block_order': 8}, {'block_label': 'text', 'block_content': '尤斯拉今年23岁，是厄立特里亚一所公立学校的艺术老师。她12岁开始在厄特孔院学习中文，在2017年第十届“汉语桥"世界中学生中文比赛中获得厄立特里亚赛区第一名，并和同伴代表厄立特里亚前往中国参加决赛，获得团体优胜奖。2022年起，尤斯拉开始在厄特孔院兼职教授中文歌曲，每周末两个课时。“中国文化博大精深，我希望我的学生们能够通过中文歌曲更好地理解中国文化。”她说。', 'block_bbox': [9, 848, 358, 1057], 'block_id': 10, 'block_order': 9}, {'block_label': 'text', 'block_content': '“姐姐，你想去中国吗?”“非常想！我想去看故宫、爬长城。”尤斯拉的学生中有一对能歌善舞的姐妹，姐姐露娅今年15岁，妹妹莉娅14岁，两人都已在厄特孔院学习多年，中文说得格外流利。\n', 'block_bbox': [9, 1064, 358, 1177], 'block_id': 11, 'block_order': 10}, {'block_label': 'text', 'block_content': '露娅对记者说：“这些年来，怀着对中文和中国文化的热爱，我们姐妹俩始终相互鼓励，一起学习。我们的中文一天比一天好，还学会了中文歌和中国舞。我们一定要到中国去。学好中文，我们的未来不是梦！”', 'block_bbox': [8, 1184, 358, 1297], 'block_id': 12, 'block_order': 11}, {'block_label': 'text', 'block_content': '据厄特孔院中方院长黄鸣飞介绍，这所孔院成立于2013年3月，由贵州财经大学和', 'block_bbox': [10, 1304, 358, 1346], 'block_id': 13, 'block_order': 12}, {'block_label': 'text', 'block_content': '厄立特里亚高等教育与研究院合作建立，开设了中国语言课程和中国文化课程，注册学生2万余人次。10余年来，厄特孔院已成为当地民众了解中国的一扇窗口。', 'block_bbox': [388, 200, 740, 290], 'block_id': 14, 'block_order': 13}, {'block_label': 'text', 'block_content': '黄鸣飞表示，随着来学习中文的人日益增多，阿斯马拉大学教学点已难以满足教学需要。2024年4月，由中企蜀道集团所属四川路桥承建的孔院教学楼项目在阿斯马拉开工建设，预计今年上半年竣工，建成后将为危特孔院提供全新的办学场地。\n', 'block_bbox': [389, 297, 740, 435], 'block_id': 15, 'block_order': 14}, {'block_label': 'paragraph_title', 'block_content': '“在中国学习的经历让我看到更广阔的世界”', 'block_bbox': [409, 456, 718, 515], 'block_id': 16, 'block_order': 15}, {'block_label': 'text', 'block_content': '多年来，厄立特里亚广大赴华留学生和培训人员积极投身国家建设，成为助力该国发展的人才和厄中友好的见证者和推动者。', 'block_bbox': [389, 537, 740, 603], 'block_id': 17, 'block_order': 16}, {'block_label': 'text', 'block_content': '在厄立特里亚全国妇女联盟工作的约翰娜·特韦尔德·凯莱塔就是其中一位。她曾在中华女子学院攻读硕士学位，研究方向是女性领导力与社会发展。其间，她实地走访中国多个地区，获得了观察中国社会发展的第一手资料。\n', 'block_bbox': [389, 609, 740, 745], 'block_id': 18, 'block_order': 17}, {'block_label': 'text', 'block_content': '谈起在中国求学的经历，约翰娜记忆犹新：“中国的发展在当今世界是独一无二的。沿着中国特色社会主义道路坚定前行，中国创造了发展奇迹，这一切都离不开中国共产党的领导。中国的发展经验值得许多国家学习借鉴。”\n', 'block_bbox': [389, 752, 740, 889], 'block_id': 19, 'block_order': 18}, {'block_label': 'text', 'block_content': '正在西南大学学习的厄立特里亚博士生穆卢盖塔·泽穆伊对中国怀有深厚感情。8年前，在北京师范大学获得硕士学位后，穆卢盖塔在社交媒体上写下这样一段话：“这是我人生的重要一步，自此我拥有了一双坚固的鞋子，赋予我穿越荆棘的力量。”', 'block_bbox': [389, 896, 740, 1033], 'block_id': 20, 'block_order': 19}, {'block_label': 'text', 'block_content': '穆卢盖塔密切关注中国在经济、科技、教育等领域的发展，“中国在科研等方面的实力与日俱增。在中国学习的经历让我看到更广阔的世界，从中受益匪浅。”\n', 'block_bbox': [389, 1040, 740, 1129], 'block_id': 21, 'block_order': 20}, {'block_label': 'text', 'block_content': '23岁的莉迪亚·埃斯蒂法诺斯已在厄特孔院学习3年，在中国书法、中国画等方面表现干分优秀，在2024年厄立特里亚赛区的“汉语桥”比赛中获得一等奖。莉迪亚说：“学习中国书法让我的内心变得安宁和纯粹。我也喜欢中国的服饰，希望未来能去中国学习，把中国不同民族元素融入服装设计中，创作出更多精美作品，也把厄特文化分享给更多的中国朋友。”\n', 'block_bbox': [389, 1136, 740, 1345], 'block_id': 22, 'block_order': 21}, {'block_label': 'text', 'block_content': '“不管远近都是客人，请不用客气；相约好了在一起，我们欢迎你……”在一场中厄青年联谊活动上，四川路桥中方员工同当地大学生合唱《北京欢迎你》。厄立特里亚技术学院计算机科学与工程专业学生鲁夫塔·谢拉是其中一名演唱者，她很早便在孔院学习中文，一直在为去中国留学作准备。“这句歌词是我们两国人民友谊的生动写照。无论是投身于厄立特里亚基础设施建设的中企员工，还是在中国留学的厄立特里亚学子，两国人民携手努力，必将推动两国关系不断向前发展。”鲁夫塔说。\n', 'block_bbox': [769, 776, 1121, 1058], 'block_id': 23, 'block_order': 22}, {'block_label': 'text', 'block_content': '厄立特里亚高等教育委员会主任助理萨马瑞表示：“每年我们都会组织学生到中国访问学习，自前有超过5000名厄立特里亚学生在中国留学。学习中国的教育经验，有助于提升厄立特里亚的教育水平。”', 'block_bbox': [770, 1064, 1121, 1177], 'block_id': 24, 'block_order': 23}, {'block_label': 'paragraph_title', 'block_content': '“共同向世界展示非洲和亚洲的灿烂文明”', 'block_bbox': [790, 1200, 1102, 1259], 'block_id': 25, 'block_order': 24}, {'block_label': 'text', 'block_content': '从阿斯马拉出发，沿着蜿蜒曲折的盘山公路一路向东寻找丝路印迹。驱车两个小时，记者来到位于厄立特里亚港口城市马萨', 'block_bbox': [770, 1280, 1122, 1346], 'block_id': 26, 'block_order': 25}, {'block_label': 'text', 'block_content': '瓦的北红海省博物馆。', 'block_bbox': [1154, 776, 1331, 794], 'block_id': 27, 'block_order': 26}, {'block_label': 'text', 'block_content': '博物馆二层陈列着一个发掘自阿杜利斯古城的中国古代陶制酒器，罐身上写着“万”“和”“禅”“山”等汉字。“这件文物证明，很早以前我们就通过海上丝绸之路进行贸易往来与文化交流。这也是厄立特里亚与中国友好交往历史的有力证明。”北红海省博物馆研究与文献部负责人伊萨亚斯·特斯法兹吉说。\n', 'block_bbox': [1152, 800, 1502, 986], 'block_id': 28, 'block_order': 27}, {'block_label': 'text', 'block_content': '厄立特里亚国家博物馆考古学和人类学研究员菲尔蒙·特韦尔德十分喜爱中国文化。他表示：“学习彼此的语言和文化，将帮助厄中两国人民更好地理解彼此，助力双方交往，搭建友谊桥梁。”\n', 'block_bbox': [1152, 992, 1502, 1106], 'block_id': 29, 'block_order': 28}, {'block_label': 'text', 'block_content': '厄立特里亚国家博物馆馆长塔吉丁·努重达姆·优素福曾多次访问中国，对中华文明的传承与创新、现代化博物馆的建设与发展印象深刻。“中国博物馆不仅有许多保存完好的文物，还充分运用先进科技手段进行展示，帮助人们更好理解中华文明。”塔吉丁说，“危立特里亚与中国都拥有悠久的文明，始终相互理解、相互尊重。我希望未来与中国同行加强合作，共同向世界展示非洲和亚洲的灿烂文明。”\n', 'block_bbox': [1151, 1112, 1502, 1346], 'block_id': 30, 'block_order': 29}], 'layout_det_res': {'input_path': None, 'page_index': None, 'boxes': [{'cls_id': 1, 'label': 'image', 'score': 0.9864752888679504, 'coordinate': [774.821, 201.05176, 1502.1008, 685.7733]}, {'cls_id': 2, 'label': 'text', 'score': 0.9859225749969482, 'coordinate': [769.8655, 776.2444, 1121.5986, 1058.4167]}, {'cls_id': 2, 'label': 'text', 'score': 0.9857110381126404, 'coordinate': [1151.98, 1112.5356, 1502.7852, 1346.3569]}, {'cls_id': 2, 'label': 'text', 'score': 0.9847239255905151, 'coordinate': [389.0322, 1136.3547, 740.2322, 1345.928]}, {'cls_id': 2, 'label': 'text', 'score': 0.9842492938041687, 'coordinate': [1152.1504, 800.1625, 1502.1265, 986.1522]}, {'cls_id': 2, 'label': 'text', 'score': 0.9840831160545349, 'coordinate': [9.158066, 848.8696, 358.5725, 1057.832]}, {'cls_id': 2, 'label': 'text', 'score': 0.9802583456039429, 'coordinate': [9.335953, 201.10046, 358.31543, 338.78876]}, {'cls_id': 2, 'label': 'text', 'score': 0.9801402688026428, 'coordinate': [389.1556, 297.4113, 740.07556, 435.41647]}, {'cls_id': 2, 'label': 'text', 'score': 0.9793564081192017, 'coordinate': [389.18976, 752.0959, 740.0832, 889.88043]}, {'cls_id': 2, 'label': 'text', 'score': 0.9793409109115601, 'coordinate': [389.02496, 896.34143, 740.7431, 1033.9465]}, {'cls_id': 2, 'label': 'text', 'score': 0.9776486754417419, 'coordinate': [8.950775, 1184.7842, 358.75067, 1297.8755]}, {'cls_id': 2, 'label': 'text', 'score': 0.9773538708686829, 'coordinate': [770.7178, 1064.5714, 1121.2249, 1177.9928]}, {'cls_id': 2, 'label': 'text', 'score': 0.9773064255714417, 'coordinate': [389.38086, 609.7071, 740.0553, 745.3206]}, {'cls_id': 2, 'label': 'text', 'score': 0.9765821099281311, 'coordinate': [1152.0115, 992.296, 1502.4929, 1106.1166]}, {'cls_id': 2, 'label': 'text', 'score': 0.9761461019515991, 'coordinate': [9.46727, 536.993, 358.2047, 651.32025]}, {'cls_id': 2, 'label': 'text', 'score': 0.975399911403656, 'coordinate': [9.353531, 1064.3059, 358.45312, 1177.8347]}, {'cls_id': 2, 'label': 'text', 'score': 0.9730532169342041, 'coordinate': [9.932312, 345.36237, 358.03476, 435.1646]}, {'cls_id': 2, 'label': 'text', 'score': 0.9722575545310974, 'coordinate': [388.91736, 200.93637, 740.00793, 290.80692]}, {'cls_id': 2, 'label': 'text', 'score': 0.9710634350776672, 'coordinate': [389.39496, 1040.3186, 740.0091, 1129.7168]}, {'cls_id': 2, 'label': 'text', 'score': 0.9696939587593079, 'coordinate': [9.6145935, 658.1123, 359.06088, 770.0288]}, {'cls_id': 2, 'label': 'text', 'score': 0.9664148092269897, 'coordinate': [770.235, 1280.4562, 1122.0927, 1346.4742]}, {'cls_id': 2, 'label': 'text', 'score': 0.9597565531730652, 'coordinate': [389.66678, 537.5609, 740.06274, 603.17725]}, {'cls_id': 2, 'label': 'text', 'score': 0.9594324827194214, 'coordinate': [10.162949, 776.86414, 359.08307, 842.1771]}, {'cls_id': 2, 'label': 'text', 'score': 0.9484634399414062, 'coordinate': [10.402863, 1304.7743, 358.9441, 1346.3749]}, {'cls_id': 0, 'label': 'paragraph_title', 'score': 0.9476125240325928, 'coordinate': [28.159409, 456.7627, 339.5631, 514.9665]}, {'cls_id': 0, 'label': 'paragraph_title', 'score': 0.9427680969238281, 'coordinate': [790.6992, 1200.3663, 1102.3799, 1259.1647]}, {'cls_id': 0, 'label': 'paragraph_title', 'score': 0.9424256682395935, 'coordinate': [409.02832, 456.6831, 718.8154, 515.5757]}, {'cls_id': 10, 'label': 'doc_title', 'score': 0.9376171827316284, 'coordinate': [133.77905, 36.884415, 1379.6667, 123.46867]}, {'cls_id': 2, 'label': 'text', 'score': 0.9020254015922546, 'coordinate': [584.9165, 159.1416, 927.22876, 179.01605]}, {'cls_id': 2, 'label': 'text', 'score': 0.895164430141449, 'coordinate': [1154.3364, 776.74646, 1331.8564, 794.2301]}, {'cls_id': 6, 'label': 'figure_title', 'score': 0.7892374992370605, 'coordinate': [808.9641, 704.2555, 1484.0623, 747.2296]}]}, 'overall_ocr_res': {'input_path': None, 'page_index': None, 'model_settings': {'use_doc_preprocessor': False, 'use_textline_orientation': False}, 'dt_polys': array([[[ 129,   42],
-        ...,
-        [ 129,  140]],
-
-       ...,
-
-       [[1156, 1330],
-        ...,
-        [1156, 1351]]], dtype=int16), 'text_det_params': {'limit_side_len': 736, 'limit_type': 'min', 'thresh': 0.3, 'max_side_limit': 4000, 'box_thresh': 0.6, 'unclip_ratio': 1.5}, 'text_type': 'general', 'textline_orientation_angles': array([-1, ..., -1]), 'text_rec_score_thresh': 0.0, 'return_word_box': False, 'rec_texts': ['助力双方交往', '搭建友谊桥梁', '本报记者沈小晓', '任', '彦', '黄培昭', '身着中国传统民族服装的厄立特里亚青', '厄立特里亚高等教育与研究院合作建立，开', '年依次登台表演中国民族舞、现代舞、扇子舞', '设了中国语言课程和中国文化课程，注册学', '等，曼妙的舞姿赢得现场观众阵阵掌声。这', '生2万余人次。10余年来，厄特孔院已成为', '是日前危立特里亚高等教育与研究院孔子学', '当地民众了解中国的一扇窗口。', '院(以下简称“厄特孔院")举办“喜迎新年"中国', '黄鸣飞表示，随着来学习中文的人日益', '歌舞比赛的场景。', '增多，阿斯马拉大学教学点已难以满足教学', '中国和厄立特里亚传统友谊深厚。近年', '需要。2024年4月，由中企蜀道集团所属四', '来，在高质量共建“一带一路”框架下，中厄两', '川路桥承建的孔院教学楼项目在阿斯马拉开', '国人文交流不断深化，互利合作的民意基础', '工建设，预计今年上半年竣工，建成后将为危', '日益深厚。', '特孔院提供全新的办学场地。', '“学好中文，我们的', '“在中国学习的经历', '未来不是梦”', '让我看到更广阔的世界”', '“鲜花曾告诉我你怎样走过，大地知道你', '多年来，厄立特里亚广大赴华留学生和', '心中的每一个角落……"厄立特里亚阿斯马拉', '培训人员积极投身国家建设，成为助力该国', '大学综合楼二层，一阵优美的歌声在走廊里回', '发展的人才和厄中友好的见证者和推动者。', '响。循着熟悉的旋律轻轻推开一间教室的门，', '在厄立特里亚全国妇女联盟工作的约翰', '学生们正跟着老师学唱中文歌曲《同一首歌》。', '娜·特韦尔德·凯莱塔就是其中一位。她曾在', '这是厄特孔院阿斯马拉大学教学点的一', '中华女子学院攻读硕士学位，研究方向是女', '节中文歌曲课。为了让学生们更好地理解歌', '性领导力与社会发展。其间，她实地走访中国', '词大意，老师尤斯拉·穆罕默德萨尔·侯赛因逐', '多个地区，获得了观察中国社会发展的第一', '在厄立特里亚不久前举办的第六届中国风筝文化节上，当地小学生体验风筝制作。', '字翻译和解释歌词。随着伴奏声响起，学生们', '手资料。', '中国驻厄立特里亚大使馆供图', '边唱边随着节拍摇动身体，现场气氛热烈。', '谈起在中国求学的经历，约翰娜记忆犹', '“这是中文歌曲初级班，共有32人。学', '新：“中国的发展在当今世界是独一无二的。', '“不管远近都是客人，请不用客气；相约', '瓦的北红海省博物馆。', '生大部分来自首都阿斯马拉的中小学，年龄', '沿着中国特色社会主义道路坚定前行，中国', '好了在一起，我们欢迎你……”在一场中厄青', '博物馆二层陈列着一个发掘自阿杜利', '最小的仅有6岁。”尤斯拉告诉记者。', '创造了发展奇迹，这一切都离不开中国共产党', '年联谊活动上，四川路桥中方员工同当地大', '斯古城的中国古代陶制酒器，罐身上写着', '尤斯拉今年23岁，是厄立特里亚一所公立', '的领导。中国的发展经验值得许多国家学习', '学生合唱《北京欢迎你》。厄立特里亚技术学', '“万”“和”“禅”“山”等汉字。“这件文物证', '学校的艺术老师。她12岁开始在厄特孔院学', '借鉴。”', '院计算机科学与工程专业学生鲁夫塔·谢拉', '明，很早以前我们就通过海上丝绸之路进行', '习中文，在2017年第十届“汉语桥"世界中学生', '正在西南大学学习的厄立特里亚博士生', '是其中一名演唱者，她很早便在孔院学习中', '贸易往来与文化交流。这也是厄立特里亚', '中文比赛中获得厄立特里亚赛区第一名，并和', '穆卢盖塔·泽穆伊对中国怀有深厚感情。8', '文，一直在为去中国留学作准备。“这句歌词', '与中国友好交往历史的有力证明。”北红海', '同伴代表厄立特里亚前往中国参加决赛，获得', '年前，在北京师范大学获得硕士学位后，穆卢', '是我们两国人民友谊的生动写照。无论是投', '省博物馆研究与文献部负责人伊萨亚斯·特', '团体优胜奖。2022年起，尤斯拉开始在厄特孔', '盖塔在社交媒体上写下这样一段话：“这是我', '身于厄立特里亚基础设施建设的中企员工，', '斯法兹吉说。', '院兼职教授中文歌曲，每周末两个课时。“中国', '人生的重要一步，自此我拥有了一双坚固的', '还是在中国留学的厄立特里亚学子，两国人', '厄立特里亚国家博物馆考古学和人类学', '文化博大精深，我希望我的学生们能够通过中', '鞋子，赋予我穿越荆棘的力量。”', '民携手努力，必将推动两国关系不断向前发', '研究员菲尔蒙·特韦尔德十分喜爱中国文', '文歌曲更好地理解中国文化。”她说。', '穆卢盖塔密切关注中国在经济、科技、教', '展。”鲁夫塔说。', '化。他表示：“学习彼此的语言和文化，将帮', '“姐姐，你想去中国吗?”“非常想！我想', '育等领域的发展，“中国在科研等方面的实力', '厄立特里亚高等教育委员会主任助理萨', '助厄中两国人民更好地理解彼此，助力双方', '去看故宫、爬长城。”尤斯拉的学生中有一对', '与日俱增。在中国学习的经历让我看到更广', '马瑞表示：“每年我们都会组织学生到中国访', '交往，搭建友谊桥梁。”', '能歌善舞的姐妹，姐姐露娅今年15岁，妹妹', '阔的世界，从中受益匪浅。”', '问学习，自前有超过5000名厄立特里亚学生', '厄立特里亚国家博物馆馆长塔吉丁·努', '莉娅14岁，两人都已在厄特孔院学习多年，', '23岁的莉迪亚·埃斯蒂法诺斯已在厄特', '在中国留学。学习中国的教育经验，有助于', '重达姆·优素福曾多次访问中国，对中华文明', '中文说得格外流利。', '孔院学习3年，在中国书法、中国画等方面表', '提升厄立特里亚的教育水平。”', '的传承与创新、现代化博物馆的建设与发展', '露娅对记者说：“这些年来，怀着对中文', '现干分优秀，在2024年厄立特里亚赛区的', '印象深刻。“中国博物馆不仅有许多保存完好', '“共同向世界展示非', '和中国文化的热爱，我们姐妹俩始终相互鼓', '“汉语桥”比赛中获得一等奖。莉迪亚说：“学', '的文物，还充分运用先进科技手段进行展示，', '励，一起学习。我们的中文一天比一天好，还', '习中国书法让我的内心变得安宁和纯粹。我', '洲和亚洲的灿烂文明”', '帮助人们更好理解中华文明。”塔吉丁说，“危', '学会了中文歌和中国舞。我们一定要到中国', '也喜欢中国的服饰，希望未来能去中国学习，', '立特里亚与中国都拥有悠久的文明，始终相', '去。学好中文，我们的未来不是梦！”', '把中国不同民族元素融入服装设计中，创作', '从阿斯马拉出发，沿着蜿蜒曲折的盘山', '互理解、相互尊重。我希望未来与中国同行', '据厄特孔院中方院长黄鸣飞介绍，这所', '出更多精美作品，也把厄特文化分享给更多', '公路一路向东寻找丝路印迹。驱车两个小', '加强合作，共同向世界展示非洲和亚洲的灿', '孔院成立于2013年3月，由贵州财经大学和', '的中国朋友。”', '时，记者来到位于厄立特里亚港口城市马萨', '烂文明。”'], 'rec_scores': array([0.99113536, ..., 0.95110023]), 'rec_polys': array([[[ 129,   42],
-        ...,
-        [ 129,  140]],
-
-       ...,
-
-       [[1156, 1330],
-        ...,
-        [1156, 1351]]], dtype=int16), 'rec_boxes': array([[ 129, ...,  140],
-       ...,
-       [1156, ..., 1351]], dtype=int16)}}}
+<pre>
+<code>
+{'res': {'input_path': 'paddleocr_vl_demo.png', 'page_index': None, 'model_settings': {'use_doc_preprocessor': False, 'use_layout_detection': True, 'use_chart_recognition': False, 'format_block_content': False}, 'layout_det_res': {'input_path': None, 'page_index': None, 'boxes': [{'cls_id': 6, 'label': 'doc_title', 'score': 0.9636914134025574, 'coordinate': [np.float32(131.31366), np.float32(36.450516), np.float32(1384.522), np.float32(127.984665)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9281806349754333, 'coordinate': [np.float32(585.39465), np.float32(158.438), np.float32(930.2184), np.float32(182.57469)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9840355515480042, 'coordinate': [np.float32(9.023666), np.float32(200.86115), np.float32(361.41583), np.float32(343.8828)]}, {'cls_id': 14, 'label': 'image', 'score': 0.9871416091918945, 'coordinate': [np.float32(775.50574), np.float32(200.66502), np.float32(1503.3807), np.float32(684.9304)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9801855087280273, 'coordinate': [np.float32(9.532196), np.float32(344.90594), np.float32(361.4413), np.float32(440.8244)]}, {'cls_id': 17, 'label': 'paragraph_title', 'score': 0.9708921313285828, 'coordinate': [np.float32(28.040405), np.float32(455.87976), np.float32(341.7215), np.float32(520.7117)]}, {'cls_id': 24, 'label': 'vision_footnote', 'score': 0.9002962708473206, 'coordinate': [np.float32(809.0692), np.float32(703.70044), np.float32(1488.3016), np.float32(750.5238)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9825374484062195, 'coordinate': [np.float32(8.896561), np.float32(536.54895), np.float32(361.05237), np.float32(655.8058)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9822263717651367, 'coordinate': [np.float32(8.971573), np.float32(657.4949), np.float32(362.01715), np.float32(774.625)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9767460823059082, 'coordinate': [np.float32(9.407074), np.float32(776.5216), np.float32(361.31067), np.float32(846.82874)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9868153929710388, 'coordinate': [np.float32(8.669495), np.float32(848.2543), np.float32(361.64703), np.float32(1062.8568)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9826608300209045, 'coordinate': [np.float32(8.8025055), np.float32(1063.8615), np.float32(361.46588), np.float32(1182.8524)]}, {'cls_id': 22, 'label': 'text', 'score': 0.982555627822876, 'coordinate': [np.float32(8.820602), np.float32(1184.4663), np.float32(361.66394), np.float32(1302.4507)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9584776759147644, 'coordinate': [np.float32(9.170288), np.float32(1304.2161), np.float32(361.48898), np.float32(1351.7483)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9782056212425232, 'coordinate': [np.float32(389.1618), np.float32(200.38202), np.float32(742.7591), np.float32(295.65146)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9844875931739807, 'coordinate': [np.float32(388.73303), np.float32(297.18463), np.float32(744.00024), np.float32(441.3034)]}, {'cls_id': 17, 'label': 'paragraph_title', 'score': 0.9680547714233398, 'coordinate': [np.float32(409.39468), np.float32(455.89386), np.float32(721.7174), np.float32(520.9387)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9741666913032532, 'coordinate': [np.float32(389.71606), np.float32(536.8138), np.float32(742.7112), np.float32(608.00165)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9840384721755981, 'coordinate': [np.float32(389.30988), np.float32(609.39636), np.float32(743.09247), np.float32(750.3231)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9845995306968689, 'coordinate': [np.float32(389.13272), np.float32(751.7772), np.float32(743.058), np.float32(894.8815)]}, {'cls_id': 22, 'label': 'text', 'score': 0.984852135181427, 'coordinate': [np.float32(388.83267), np.float32(896.0371), np.float32(743.58215), np.float32(1038.7345)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9804865717887878, 'coordinate': [np.float32(389.08478), np.float32(1039.9119), np.float32(742.7585), np.float32(1134.4897)]}, {'cls_id': 22, 'label': 'text', 'score': 0.986461341381073, 'coordinate': [np.float32(388.52643), np.float32(1135.8137), np.float32(743.451), np.float32(1352.0085)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9869391918182373, 'coordinate': [np.float32(769.8341), np.float32(775.66235), np.float32(1124.9813), np.float32(1063.207)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9822869896888733, 'coordinate': [np.float32(770.30383), np.float32(1063.938), np.float32(1124.8295), np.float32(1184.2192)]}, {'cls_id': 17, 'label': 'paragraph_title', 'score': 0.9689218997955322, 'coordinate': [np.float32(791.3042), np.float32(1199.3169), np.float32(1104.4521), np.float32(1264.6985)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9713128209114075, 'coordinate': [np.float32(770.4253), np.float32(1279.6072), np.float32(1124.6917), np.float32(1351.8672)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9236552119255066, 'coordinate': [np.float32(1153.9058), np.float32(775.5814), np.float32(1334.0654), np.float32(798.1581)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9857938885688782, 'coordinate': [np.float32(1151.5197), np.float32(799.28015), np.float32(1506.3619), np.float32(991.1156)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9820687174797058, 'coordinate': [np.float32(1151.5686), np.float32(991.91095), np.float32(1506.6023), np.float32(1110.8875)]}, {'cls_id': 22, 'label': 'text', 'score': 0.9866049885749817, 'coordinate': [np.float32(1151.6919), np.float32(1112.1301), np.float32(1507.1611), np.float32(1351.9504)]}]}}}
 </code></pre></details>
 
-运行结果参数说明可以参考[2.2.2 Python脚本方式集成](#222-python脚本方式集成)中的结果解释。
+运行结果参数说明可以参考[2.2 Python脚本方式集成](#22-python脚本方式集成)中的结果解释。
 
-<b>注：</b>由于产线的默认模型较大，推理速度可能较慢，您可以参考第一节的模型列表，替换推理速度更快的模型。
+<b>注：</b>由于 PaddleOCR-VL 的默认模型较大，推理速度可能较慢，建议实际推理使用[3. 使用推理加速框架提升 VLM 推理性能](#3-使用推理加速框架提升-vlm-推理性能) 方式进行快速推理。
 
 ### 2.2 Python脚本方式集成
-几行代码即可完成产线的快速推理：
+
+命令行方式是为了快速体验查看效果，一般来说，在项目中，往往需要通过代码集成，您可以通过几行代码即可完成 PaddleOCR-VL 的快速推理，推理代码如下：
 
 ```python
 from paddlex import create_pipeline
 
 pipeline = create_pipeline(pipeline="PaddleOCR-VL")
 
-output = pipeline.predict(
-    input="./paddleocr_vl_demo.png",
-    use_doc_orientation_classify=False,
-    use_doc_unwarping=False,
-)
+output = pipeline.predict(input="./pp_ocr_vl_demo.png")
+
 for res in output:
     res.print() ## 打印预测的结构化输出
     res.save_to_json(save_path="output") ## 保存当前图像的结构化json结果
@@ -404,51 +387,222 @@ for item in markdown_images:
 
 **注：**
 
-- 在示例代码中，`use_doc_orientation_classify`、`use_doc_unwarping` 参数默认均设置为 `False`，分别表示关闭文档方向分类、文档扭曲矫正功能，如果需要使用这些功能，可以手动设置为 `True`。
+- 在示例代码中，`use_doc_orientation_classify`、`use_doc_unwarping` 参数默认均设置为 `False`，分别表示关闭文档方向分类、文本图像矫正功能，如果需要使用这些功能，可以手动设置为 `True`。
 
 在上述 Python 脚本中，执行了如下几个步骤：
-<details><summary>（1）实例化 <code>create_pipeline</code> 实例化产线对象，具体参数说明如下：</summary>
+
+<details><summary>（1）实例化对象，具体参数说明如下：</summary>
 
 <table>
 <thead>
 <tr>
-<th>参数</th>
-<th>参数说明</th>
-<th>参数类型</th>
-<th>默认值</th>
+<th>参数</th>
+<th>参数说明</th>
+<th>参数类型</th>
+<th>默认值</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>layout_detection_model_name</code></td>
+<td>版面区域检测排序模型名称。如果设置为<code>None</code>，将会使用默认模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_detection_model_dir</code></td>
+<td>版面区域检测排序模型的目录路径。如果设置为<code>None</code>，将会下载官方模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_threshold</code></td>
+<td>版面模型得分阈值。
+<ul>
+<li><b>float</b>：<code>0-1</code> 之间的任意浮点数；</li>
+<li><b>dict</b>： <code>{0:0.1}</code> key为类别ID，value为该类别的阈值；</li>
+<li><b>None</b>：如果设置为<code>None</code>，将使用初始化的默认值。</li>
+</ul>
+</td>
+<td><code>float|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_nms</code></td>
+<td>版面检测是否使用后处理NMS。如果设置为<code>None</code>，将使用初始化的默认值。</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_unclip_ratio</code></td>
+<td>版面区域检测模型检测框的扩张系数。
+<ul>
+<li><b>float</b>：任意大于 <code>0</code>  浮点数；</li>
+<li><b>Tuple[float,float]</b>：在横纵两个方向各自的扩张系数；</li>
+<li><b>dict</b>，dict的key为<b>int</b>类型，代表<code>cls_id</code>, value为<b>tuple</b>类型，如<code>{0: (1.1, 2.0)}</code>，表示将模型输出的第0类别检测框中心不变，宽度扩张1.1倍，高度扩张2.0倍；</li>
+<li><b>None</b>：如果设置为<code>None</code>，将使用初始化的默认值。</li>
+</ul>
+</td>
+<td><code>float|Tuple[float,float]|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>layout_merge_bboxes_mode</code></td>
+<td>版面区域检测的重叠框过滤方式。
+<ul>
+<li><b>str</b>：<code>large</code>，<code>small</code>，<code>union</code>，分别表示重叠框过滤时选择保留大框，小框还是同时保留；</li>
+<li><b>dict</b>： dict的key为<b>int</b>类型，代表<code>cls_id</code>，value为<b>str</b>类型，如<code>{0: "large", 2: "small"}</code>，表示对第0类别检测框使用large模式，对第2类别检测框使用small模式；</li>
+<li><b>None</b>：如果设置为<code>None</code>，将使用初始化的默认值。</li>
+</ul>
+</td>
+<td><code>str|dict|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_name</code></td>
+<td>多模态识别模型名称。如果设置为<code>None</code>，将会使用默认模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_model_dir</code></td>
+<td>多模态识别模型目录路径。如果设置为<code>None</code>，将会下载官方模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_backend</code></td>
+<td>多模态识别模型使用的推理后端。</td>
+<td><code>int|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_server_url</code></td>
+<td>如果多模态识别模型使用推理服务，该参数用于指定服务器URL。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>vl_rec_max_concurrency</code></td>
+<td>如果多模态识别模型使用推理服务，该参数用于指定最大并发请求数。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_name</code></td>
+<td>文档方向分类模型的名称。如果设置为<code>None</code>，将会使用默认模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_orientation_classify_model_dir</code></td>
+<td>文档方向分类模型的目录路径。如果设置为<code>None</code>，将会下载官方模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_name</code></td>
+<td>文本图像矫正模型的名称。如果设置为<code>None</code>，将会使用默认模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>doc_unwarping_model_dir</code></td>
+<td>文本图像矫正模型的目录路径。如果设置为<code>None</code>，将会下载官方模型。</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_doc_orientation_classify</code></td>
+<td>是否加载并使用文档方向分类模块。如果设置为<code>None</code>，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_doc_unwarping</code></td>
+<td>是否加载并使用文本图像矫正模块。如果设置为<code>None</code>，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>use_layout_detection</code></td>
+<td>是否加载并使用版面区域检测排序模块。如果设置为<code>None</code>，将使用初始化的默认值，默认初始化为<code>True</code>。</td>
+<td><code>bool|None</code></td>
+<td><code>None</code></td>
 </tr>
-</thead>
-<tbody>
 <tr>
-<td><code>pipeline</code></td>
-<td>产线名称或是产线配置文件路径。如为产线名称，则必须为 PaddleX 所支持的产线。</td>
-<td><code>str</code></td>
+<td><code>use_chart_recognition</code></td>
+<td>是否加载并使用图表解析模块。如果设置为<code>None</code>，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool|None</code></td>
 <td><code>None</code></td>
 </tr>
 <tr>
-<td><code>config</code></td>
-<td>产线配置文件路径。</td>
-<td><code>str</code></td>
+<td><code>format_block_content</code></td>
+<td>控制是否将 <code>block_content</code> 中的内容格式化为Markdown格式。如果设置为<code>None</code>，将使用初始化的默认值，默认初始化为<code>False</code>。</td>
+<td><code>bool|None</code></td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>device</code></td>
-<td>产线推理设备。支持指定GPU具体卡号，如“gpu:0”，其他硬件具体卡号，如“npu:0”，CPU如“cpu”。支持同时指定多个设备以进行并行推理，详情请参考产线并行推理文档。</td>
+<td>用于推理的设备。支持指定具体卡号：
+<ul>
+<li><b>CPU</b>：如 <code>cpu</code> 表示使用 CPU 进行推理；</li>
+<li><b>GPU</b>：如 <code>gpu:0</code> 表示使用第 1 块 GPU 进行推理；</li>
+<li><b>NPU</b>：如 <code>npu:0</code> 表示使用第 1 块 NPU 进行推理；</li>
+<li><b>XPU</b>：如 <code>xpu:0</code> 表示使用第 1 块 XPU 进行推理；</li>
+<li><b>MLU</b>：如 <code>mlu:0</code> 表示使用第 1 块 MLU 进行推理；</li>
+<li><b>DCU</b>：如 <code>dcu:0</code> 表示使用第 1 块 DCU 进行推理；</li>
+<li><b>None</b>：如果设置为<code>None</code>，初始化时，会优先使用本地的 GPU 0号设备，如果没有，则使用 CPU 设备。</li>
+</ul>
+</td>
+<td><code>str|None</code></td>
+<td><code>None</code></td>
+</tr>
+<tr>
+<td><code>enable_hpi</code></td>
+<td>是否启用高性能推理。</td>
+<td><code>bool</code></td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<td><code>use_tensorrt</code></td>
+<td>是否启用 Paddle Inference 的 TensorRT 子图引擎。如果模型不支持通过 TensorRT 加速，即使设置了此标志，也不会使用加速。<br/>
+对于 CUDA 11.8 版本的飞桨，兼容的 TensorRT 版本为 8.x（x>=6），建议安装 TensorRT 8.6.1.6。<br/>
+</td>
+<td><code>bool</code></td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<td><code>precision</code></td>
+<td>计算精度，如 fp32、fp16。</td>
 <td><code>str</code></td>
-<td><code>gpu:0</code></td>
+<td><code>"fp32"</code></td>
 </tr>
 <tr>
-<td><code>use_hpip</code></td>
-<td>是否启用高性能推理插件。如果为 <code>None</code>，则使用配置文件或 <code>config</code> 中的配置。</td>
-<td><code>bool</code> | <code>None</code></td>
-<td>无</td>
-<td><code>None</code></td>
+<td><code>enable_mkldnn</code></td>
+<td>是否启用 MKL-DNN 加速推理。如果 MKL-DNN 不可用或模型不支持通过 MKL-DNN 加速，即使设置了此标志，也不会使用加速。
+</td>
+<td><code>bool</code></td>
+<td><code>True</code></td>
 </tr>
 <tr>
-<td><code>hpi_config</code></td>
-<td>高性能推理配置</td>
-<td><code>dict</code> | <code>None</code></td>
-<td>无</td>
+<td><code>mkldnn_cache_capacity</code></td>
+<td>
+MKL-DNN 缓存容量。
+</td>
+<td><code>int</code></td>
+<td><code>10</code></td>
+</tr>
+<tr>
+<td><code>cpu_threads</code></td>
+<td>在 CPU 上进行推理时使用的线程数。</td>
+<td><code>int</code></td>
+<td><code>8</code></td>
+</tr>
+<tr>
+<td><code>paddlex_config</code></td>
+<td>PaddleX产线配置文件路径。</td>
+<td><code>str|None</code></td>
 <td><code>None</code></td>
 </tr>
 </tbody>
@@ -456,7 +610,7 @@ for item in markdown_images:
 
 </details>
 
-<details><summary>（2）调用版面解析产线对象的 <code>predict()</code> 方法进行推理预测。该方法将返回一个 <code>generator</code>。以下是 <code>predict()</code> 方法的参数及其说明：</summary>
+<details><summary>（2）调用 PaddleOCR-VL 对象的 <code>predict()</code> 方法进行推理预测，该方法会返回一个结果列表。另外，PaddleOCR-VL 还提供了 <code>predict_iter()</code> 方法。两者在参数接受和结果返回方面是完全一致的，区别在于 <code>predict_iter()</code> 返回的是一个 <code>generator</code>，能够逐步处理和获取预测结果，适合处理大型数据集或希望节省内存的场景。可以根据实际需求选择使用这两种方法中的任意一种。以下是 <code>predict()</code> 方法的参数及其说明：</summary>
 
 <table>
 <thead>
@@ -464,220 +618,118 @@ for item in markdown_images:
 <th>参数</th>
 <th>参数说明</th>
 <th>参数类型</th>
-<th>可选项</th>
 <th>默认值</th>
 </tr>
 </thead>
 <tr>
 <td><code>input</code></td>
-<td>待预测数据，支持多种输入类型，必填</td>
-<td><code>Python Var|str|list</code></td>
-<td>
+<td>待预测数据，支持多种输入类型，必填。
 <ul>
 <li><b>Python Var</b>：如 <code>numpy.ndarray</code> 表示的图像数据</li>
 <li><b>str</b>：如图像文件或者PDF文件的本地路径：<code>/root/data/img.jpg</code>；<b>如URL链接</b>，如图像文件或PDF文件的网络URL：<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png">示例</a>；<b>如本地目录</b>，该目录下需包含待预测图像，如本地路径：<code>/root/data/</code>(当前不支持目录中包含PDF文件的预测，PDF文件需要指定到具体文件路径)</li>
-<li><b>List</b>：列表元素需为上述类型数据，如<code>[numpy.ndarray, numpy.ndarray]</code>，<code>[\"/root/data/img1.jpg\", \"/root/data/img2.jpg\"]</code>，<code>[\"/root/data1\", \"/root/data2\"]</code></li>
+<li><b>list</b>：列表元素需为上述类型数据，如<code>[numpy.ndarray, numpy.ndarray]</code>，<code>["/root/data/img1.jpg", "/root/data/img2.jpg"]</code>，<code>["/root/data1", "/root/data2"]。</code></li>
 </ul>
 </td>
-<td><code>None</code></td>
+<td><code>Python Var|str|list</code></td>
+<td></td>
 </tr>
 <tr>
 <td><code>use_doc_orientation_classify</code></td>
-<td>是否使用文档方向分类模块</td>
+<td>是否在推理时使用文档方向分类模块。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>True</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>use_doc_unwarping</code></td>
-<td>是否使用文档扭曲矫正模块</td>
+<td>是否在推理时使用文本图像矫正模块。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>True</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>use_layout_detection</code></td>
-<td>是否使用版面检测模块</td>
+<td>是否在推理时使用版面区域检测排序模块。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>True</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>use_chart_recognition</code></td>
-<td>是否使用图表识别功能</td>
+<td>是否在推理时使用图表解析模块。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>False</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>layout_threshold</code></td>
-<td>版面模型得分阈值</td>
+<td>参数含义与实例化参数基本相同。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>float|dict|None</code></td>
-<td>
-<ul>
-<li><b>float</b>：<code>0-1</code> 之间的任意浮点数；</li>
-<li><b>dict</b>： <code>{0:0.1}</code> key为类别ID，value为该类别的阈值；</li>
-<li><b>None</b>：如果设置为 <code>None</code>, 将默认使用产线初始化的该参数值，初始化为 <code>0.5</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>layout_nms</code></td>
-<td>版面区域检测模型是否使用NMS后处理</td>
+<td>参数含义与实例化参数基本相同。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>True</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>layout_unclip_ratio</code></td>
-<td>版面区域检测模型检测框的扩张系数</td>
+<td>参数含义与实例化参数基本相同。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>float|Tuple[float,float]|dict|None</code></td>
-<td>
-<ul>
-<li><b>float</b>：任意大于 <code>0</code>  浮点数；</li>
-<li><b>Tuple[float,float]</b>：在横纵两个方向各自的扩张系数；</li>
-<li><b>字典</b>, 字典的key为<b>int</b>类型，代表<code>cls_id</code>, value为<b>tuple</b>类型，如<code>{0: (1.1, 2.0)}</code>, 表示将模型输出的第0类别检测框中心不变，宽度扩张1.1倍，高度扩张2.0倍</li>
-<li><b>None</b>：如果设置为 <code>None</code>, 将默认使用产线初始化的该参数值，初始化为 <code>1.0</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>layout_merge_bboxes_mode</code></td>
-<td>版面区域检测的重叠框过滤方式</td>
+<td>参数含义与实例化参数基本相同。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>str|dict|None</code></td>
-<td>
-<ul>
-<li><b>str</b>：<code>large</code>，<code>small</code>, <code>union</code>，分别表示重叠框过滤时选择保留大框，小框还是同时保留</li>
-<li><b>dict</b>, 字典的key为<b>int</b>类型，代表<code>cls_id</code>, value为<b>str</b>类型, 如<code>{0: "large", 2: "small"}</code>, 表示对第0类别检测框使用large模式，对第2类别检测框使用small模式</li>
-<li><b>None</b>：如果设置为 <code>None</code>, 将默认使用产线初始化的该参数值，初始化为 <code>large</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>use_queues</code></td>
 <td>用于控制是否启用内部队列。当设置为 <code>True</code> 时，数据加载（如将 PDF 页面渲染为图像）、版面检测模型处理以及 VLM 推理将分别在独立线程中异步执行，通过队列传递数据，从而提升效率。对于页数较多的 PDF 文档，或是包含大量图像或 PDF 文件的目录，这种方式尤其高效。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>True</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>prompt_label</code></td>
-<td>VL模型的prompt类型设置，当且仅当 <code>use_layout_detection=Fasle</code> 时生效</td>
+<td>VL模型的 prompt 类型设置，当且仅当 <code>use_layout_detection=False</code> 时生效。可填写参数为 <code>ocr</code>、<code>formula</code>、<code>table</code> 和 <code>chart</code>。</td>
 <td><code>str|None</code></td>
-<td>
-<ul>
-<li><b>str</b>：<code>ocr</code>、<code>formula</code>、<code>table</code> 或者 <code>chart</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>ocr</code>；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>format_block_content</code></td>
-<td>控制是否将 <code>block_content</code> 中的内容格式化为Markdown格式</td>
+<td>参数含义与实例化参数基本相同。设置为<code>None</code>表示使用实例化参数，否则该参数优先级更高。</td>
 <td><code>bool|None</code></td>
-<td>
-<ul>
-<li><b>bool</b>：<code>True</code> 或者 <code>False</code>；</li>
-<li><b>None</b>：如果设置为<code>None</code>, 将默认使用产线初始化的该参数值，初始化为<code>False</code>；</li>
-</ul>
-</td>
+<td><code>None</code></td>
 </tr>
 <tr>
 <td><code>repetition_penalty</code></td>
-<td>VL模型采样使用的重复惩罚参数</td>
+<td>VL模型采样使用的重复惩罚参数。</td>
 <td><code>float|None</code></td>
-<td>
-<ul>
-<li><b>float</b>：任意大于等于<code>0</code>的浮点数；</li>
-<li><b>None</b>：如果设置为<code>None</code>，将使用默认值；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>temperature</code></td>
-<td>VL模型采样使用的温度参数</td>
+<td>VL模型采样使用的温度参数。</td>
 <td><code>float|None</code></td>
-<td>
-<ul>
-<li><b>float</b>：任意大于等于<code>0</code>的浮点数；</li>
-<li><b>None</b>：如果设置为<code>None</code>，将使用默认值；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>top_p</code></td>
-<td>VL模型采样使用的top-p参数</td>
+<td>VL模型采样使用的top-p参数。</td>
 <td><code>float|None</code></td>
-<td>
-<ul>
-<li><b>float</b>：取值范围在<code>&#40;0, 1&#93;</code>的浮点数；</li>
-<li><b>None</b>：如果设置为<code>None</code>，将使用默认值；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>min_pixels</code></td>
-<td>VL模型预处理图像时允许的最小像素数</td>
+<td>VL模型预处理图像时允许的最小像素数。</td>
 <td><code>int|None</code></td>
-<td>
-<ul>
-<li><b>int</b>：任意大于<code>0</code>的整数；</li>
-<li><b>None</b>：如果设置为<code>None</code>，将使用默认值；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 <tr>
 <td><code>max_pixels</code></td>
-<td>VL模型预处理图像时允许的最大像素数</td>
+<td>VL模型预处理图像时允许的最大像素数。</td>
 <td><code>int|None</code></td>
-<td>
-<ul>
-<li><b>int</b>：任意大于<code>0</code>的整数；</li>
-<li><b>None</b>：如果设置为<code>None</code>，将使用默认值；</li>
-</ul>
-</td>
 <td><code>None</code></td>
 </tr>
 </table>
-
 </details>
 
 <details><summary>（3）对预测结果进行处理：每个样本的预测结果均为对应的Result对象，且支持打印、保存为图片、保存为<code>json</code>文件的操作:</summary>
@@ -698,19 +750,19 @@ for item in markdown_images:
 <td rowspan="3">打印结果到终端</td>
 <td><code>format_json</code></td>
 <td><code>bool</code></td>
-<td>是否对输出内容进行使用 <code>JSON</code> 缩进格式化</td>
+<td>是否对输出内容进行使用 <code>JSON</code> 缩进格式化。</td>
 <td><code>True</code></td>
 </tr>
 <tr>
 <td><code>indent</code></td>
 <td><code>int</code></td>
-<td>指定缩进级别，以美化输出的 <code>JSON</code> 数据，使其更具可读性，仅当 <code>format_json</code> 为 <code>True</code> 时有效</td>
+<td>指定缩进级别，以美化输出的 <code>JSON</code> 数据，使其更具可读性，仅当 <code>format_json</code> 为 <code>True</code> 时有效。</td>
 <td>4</td>
 </tr>
 <tr>
 <td><code>ensure_ascii</code></td>
 <td><code>bool</code></td>
-<td>控制是否将非 <code>ASCII</code> 字符转义为 <code>Unicode</code>。设置为 <code>True</code> 时，所有非 <code>ASCII</code> 字符将被转义；<code>False</code> 则保留原始字符，仅当<code>format_json</code>为<code>True</code>时有效</td>
+<td>控制是否将非 <code>ASCII</code> 字符转义为 <code>Unicode</code>。设置为 <code>True</code> 时，所有非 <code>ASCII</code> 字符将被转义；<code>False</code> 则保留原始字符，仅当<code>format_json</code>为<code>True</code>时有效。</td>
 <td><code>False</code></td>
 </tr>
 <tr>
@@ -718,19 +770,19 @@ for item in markdown_images:
 <td rowspan="3">将结果保存为json格式的文件</td>
 <td><code>save_path</code></td>
 <td><code>str</code></td>
-<td>保存的文件路径，当为目录时，保存文件命名与输入文件类型命名一致</td>
+<td>保存的文件路径，当为目录时，保存文件命名与输入文件类型命名一致。</td>
 <td>无</td>
 </tr>
 <tr>
 <td><code>indent</code></td>
 <td><code>int</code></td>
-<td>指定缩进级别，以美化输出的 <code>JSON</code> 数据，使其更具可读性，仅当 <code>format_json</code> 为 <code>True</code> 时有效</td>
+<td>指定缩进级别，以美化输出的 <code>JSON</code> 数据，使其更具可读性，仅当 <code>format_json</code> 为 <code>True</code> 时有效。</td>
 <td>4</td>
 </tr>
 <tr>
 <td><code>ensure_ascii</code></td>
 <td><code>bool</code></td>
-<td>控制是否将非 <code>ASCII</code> 字符转义为 <code>Unicode</code>。设置为 <code>True</code> 时，所有非 <code>ASCII</code> 字符将被转义；<code>False</code> 则保留原始字符，仅当<code>format_json</code>为<code>True</code>时有效</td>
+<td>控制是否将非 <code>ASCII</code> 字符转义为 <code>Unicode</code>。设置为 <code>True</code> 时，所有非 <code>ASCII</code> 字符将被转义；<code>False</code> 则保留原始字符，仅当<code>format_json</code>为<code>True</code>时有效。</td>
 <td><code>False</code></td>
 </tr>
 <tr>
@@ -738,23 +790,36 @@ for item in markdown_images:
 <td>将中间各个模块的可视化图像保存在png格式的图像</td>
 <td><code>save_path</code></td>
 <td><code>str</code></td>
-<td>保存的文件路径，支持目录或文件路径</td>
+<td>保存的文件路径，支持目录或文件路径。</td>
 <td>无</td>
 </tr>
 <tr>
-<td><code>save_to_markdown()</code></td>
-<td>将图像或者PDF文件中的每一页分别保存为markdown格式的文件</td>
+<td rowspan="3"><code>save_to_markdown()</code></td>
+<td rowspan="3">将图像或者PDF文件中的每一页分别保存为markdown格式的文件</td>
 <td><code>save_path</code></td>
 <td><code>str</code></td>
-<td>保存的文件路径，支持目录或文件路径</td>
+<td>保存的文件路径，当为目录时，保存文件命名与输入文件类型命名一致</td>
 <td>无</td>
 </tr>
 <tr>
+<td><code>pretty</code></td>
+<td><code>bool</code></td>
+<td>是否美化 <code>markdown</code> 输出结果，将图表等进行居中操作，使 <code>markdown</code> 渲染后更美观。</td>
+<td>True</td>
+</tr>
+<tr>
+<td><code>show_formula_number</code></td>
+<td><code>bool</code></td>
+<td>控制是否在 <code>markdown</code> 中将保留公式编号。设置为 <code>True</code> 时，保留全部公式编号；<code>False</code> 则仅保留公式</td>
+<td><code>False</code></td>
+</tr>
+<tr>
+<tr>
 <td><code>save_to_html()</code></td>
 <td>将文件中的表格保存为html格式的文件</td>
 <td><code>save_path</code></td>
 <td><code>str</code></td>
-<td>保存的文件路径，支持目录或文件路径</td>
+<td>保存的文件路径，支持目录或文件路径。</td>
 <td>无</td>
 </tr>
 <tr>
@@ -762,17 +827,9 @@ for item in markdown_images:
 <td>将文件中的表格保存为xlsx格式的文件</td>
 <td><code>save_path</code></td>
 <td><code>str</code></td>
-<td>保存的文件路径，支持目录或文件路径</td>
+<td>保存的文件路径，支持目录或文件路径。</td>
 <td>无</td>
 </tr>
-<tr>
-<td><code>concatenate_markdown_pages()</code></td>
-<td>将多页Markdown内容拼接为单一文档</td>
-<td><code>markdown_list</code></td>
-<td><code>list</code></td>
-<td>包含每一页Markdown数据的列表</td>
-<td>返回处理后的Markdown文本和图像列表</td>
-</tr>
 </table>
 
 - 调用`print()` 方法会将结果打印到终端，打印到终端的内容解释如下：
@@ -780,17 +837,17 @@ for item in markdown_images:
 
     - `page_index`: `(Union[int, None])` 如果输入是PDF文件，则表示当前是PDF的第几页，否则为 `None`
 
-    - `model_settings`: `(Dict[str, bool])` 配置产线所需的模型参数
+    - `model_settings`: `(Dict[str, bool])` 配置 PaddleOCR-VL 所需的模型参数
 
         - `use_doc_preprocessor`: `(bool)` 控制是否启用文档预处理子产线
         - `use_layout_detection`: `(bool)` 控制是否启用版面检测模块
-        - `use_chart_recognition`: `(bool)` 控制是否启用图表识别功能
-        - `format_block_content`: `(bool)` 控制是否将 `block_content` 中的内容格式化为Markdown格式
+        - `use_chart_recognition`: `(bool)` 控制是否开启图表识别功能
+        - `format_block_content`: `(bool)` 控制是否在`JSON`中保存格式化后的markdown内容
 
-    - `doc_preprocessor_res`: `(Dict[str, Union[List[float], str]])` 文档预处理结果字典，仅当`use_doc_preprocessor=True`时存在
-        - `input_path`: `(str)` 文档预处理子产线接受的图像路径，当输入为`numpy.ndarray`时，保存为`None`，此处为`None`
+    - `doc_preprocessor_res`: `(Dict[str, Union[List[float], str]])` 文档预处理结果dict，仅当`use_doc_preprocessor=True`时存在
+        - `input_path`: `(str)` 文档预处理子接受的图像路径，当输入为`numpy.ndarray`时，保存为`None`，此处为`None`
         - `page_index`: `None`，此处的输入为`numpy.ndarray`，所以值为`None`
-        - `model_settings`: `(Dict[str, bool])` 文档预处理子产线的模型配置参数
+        - `model_settings`: `(Dict[str, bool])` 文档预处理子的模型配置参数
           - `use_doc_orientation_classify`: `(bool)` 控制是否启用文档图像方向分类子模块
           - `use_doc_unwarping`: `(bool)` 控制是否启用文本图像扭曲矫正子模块
         - `angle`: `(int)` 文档图像方向分类子模块的预测结果，启用时返回实际角度值
@@ -802,15 +859,34 @@ for item in markdown_images:
         - `block_id`: `(int)` 版面区域的索引，用于显示版面排序结果。
         - `block_order` `(int)` 版面区域的顺序，用于显示版面阅读顺序,对于非排序部分，默认值为 `None`。
 
-    - `formula_res_list`: `(List[Dict[str, Union[numpy.ndarray, List[float], str]]])` 公式识别结果列表，每个元素为一个字典
-        - `rec_formula`: `(str)` 公式识别结果
-        - `rec_polys`: `(numpy.ndarray)` 公式检测框，shape为(4, 2)，dtype为int16
-        - `formula_region_id`: `(int)` 公式所在的区域编号
+- 调用`save_to_json()` 方法会将上述内容保存到指定的 `save_path` 中，如果指定为目录，则保存的路径为`save_path/{your_img_basename}_res.json`，如果指定为文件，则直接保存到该文件中。由于 json 文件不支持保存numpy数组，因此会将其中的 `numpy.array` 类型转换为列表形式。json中的字段内容如下：
+    - `input_path`: `(str)` 待预测图像或者PDF的输入路径
+
+    - `page_index`: `(Union[int, None])` 如果输入是PDF文件，则表示当前是PDF的第几页，否则为 `None`
+
+    - `model_settings`: `(Dict[str, bool])` 配置 PaddleOCR-VL 所需的模型参数
+
+        - `use_doc_preprocessor`: `(bool)` 控制是否启用文档预处理子产线
+        - `use_layout_detection`: `(bool)` 控制是否启用版面检测模块
+        - `use_chart_recognition`: `(bool)` 控制是否开启图表识别功能
+        - `format_block_content`: `(bool)` 控制是否在`JSON`中保存格式化后的markdown内容
+
+    - `doc_preprocessor_res`: `(Dict[str, Union[List[float], str]])` 文档预处理结果dict，仅当`use_doc_preprocessor=True`时存在
+        - `input_path`: `(str)` 文档预处理子接受的图像路径，当输入为`numpy.ndarray`时，保存为`None`，此处为`None`
+        - `page_index`: `None`，此处的输入为`numpy.ndarray`，所以值为`None`
+        - `model_settings`: `(Dict[str, bool])` 文档预处理子的模型配置参数
+          - `use_doc_orientation_classify`: `(bool)` 控制是否启用文档图像方向分类子模块
+          - `use_doc_unwarping`: `(bool)` 控制是否启用文本图像扭曲矫正子模块
+        - `angle`: `(int)` 文档图像方向分类子模块的预测结果，启用时返回实际角度值
 
-- 调用`save_to_json()` 方法会将上述内容保存到指定的 `save_path` 中，如果指定为目录，则保存的路径为`save_path/{your_img_basename}_res.json`，如果指定为文件，则直接保存到该文件中。由于 json 文件不支持保存numpy数组，因此会将其中的 `numpy.array` 类型转换为列表形式。
-- 调用`save_to_img()` 方法会将可视化结果保存到指定的 `save_path` 中，如果指定为目录，则会将版面区域检测可视化图像、版面阅读顺序可视化图像等内容保存，如果指定为文件，则直接保存到该文件中。(产线通常包含较多结果图片，不建议直接指定为具体的文件路径，否则多张图会被覆盖，仅保留最后一张图)
-- 调用`save_to_markdown()` 方法会将转化后的 Markdown 文件保存到指定的 `save_path` 中，保存的文件路径为`save_path/{your_img_basename}.md`，如果输入是 PDF 文件，建议直接指定目录，否责多个 markdown 文件会被覆盖，该方法另外支持传入 `pretty` 参数用于控制是否将图片、表格等美化为居中展示。传入 `show_formula_number` 参数用于控制是否将公式编号展示在 markdown 结果文件中。
-- 调用 `concatenate_markdown_pages()` 方法将 `PaddleOCR-VL pipeline` 输出的多页Markdown内容`markdown_list`合并为单个完整文档，并返回合并后的Markdown内容。
+    - `parsing_res_list`: `(List[Dict])` 解析结果的列表，每个元素为一个字典，列表顺序为解析后的阅读顺序。
+        - `block_bbox`: `(np.ndarray)` 版面区域的边界框。
+        - `block_label`: `(str)` 版面区域的标签，例如`text`, `table`等。
+        - `block_content`: `(str)` 内容为版面区域内的内容。
+        - `block_id`: `(int)` 版面区域的索引，用于显示版面排序结果。
+        - `block_order` `(int)` 版面区域的顺序，用于显示版面阅读顺序,对于非排序部分，默认值为 `None`。
+- 调用`save_to_img()` 方法会将可视化结果保存到指定的 `save_path` 中，如果指定为目录，则会将版面区域检测可视化图像、全局OCR可视化图像、版面阅读顺序可视化图像等内容保存，如果指定为文件，则直接保存到该文件中。
+- 调用`save_to_markdown()` 方法会将转化后的 Markdown 文件保存到指定的 `save_path` 中，保存的文件路径为`save_path/{your_img_basename}.md`，如果输入是 PDF 文件，建议直接指定目录，否责多个 markdown 文件会被覆盖。
 
 此外，也支持通过属性获取带结果的可视化图像和预测结果，具体如下：
 <table>
@@ -842,35 +918,12 @@ for item in markdown_images:
 </tbody>
 </table>
 
-- `json` 属性获取的预测结果为字典类型的数据，相关内容与调用 `save_to_json()` 方法保存的内容一致。
-- `img` 属性返回的预测结果是一个字典类型的数据。其中，键分别为 `layout_det_res`、`overall_ocr_res`、`text_paragraphs_ocr_res`、`formula_res_region1`、`table_cell_img` 和 `seal_res_region1`，对应的值是 `Image.Image` 对象：分别用于显示版面区域检测、OCR、OCR文本段落、公式、表格和印章结果的可视化图像。如果没有使用可选模块，则字典中只包含 `layout_det_res`。
-- `markdown` 属性返回的预测结果是一个字典类型的数据。其中，键分别为 `markdown_texts` 、 `markdown_images`和`page_continuation_flags`，对应的值分别是 markdown 文本，在 Markdown 中显示的图像（`Image.Image` 对象）和用于标识当前页面第一个元素是否为段开始以及最后一个元素是否为段结束的bool元组。
+- `json` 属性获取的预测结果为dict类型的数据，相关内容与调用 `save_to_json()` 方法保存的内容一致。
+- `img` 属性返回的预测结果是一个dict类型的数据。其中，键分别为 `layout_det_res` 和 `layout_order_res`，对应的值是 `Image.Image` 对象：分别用于显示版面区域检测和版面阅读顺序结果的可视化图像。如果没有使用可选模块，则dict中只包含 `layout_det_res`。
+- `markdown` 属性返回的预测结果是一个dict类型的数据。其中，键分别为 `markdown_texts` 和`markdown_images`，对应的值分别是 markdown 文本，在 Markdown 中显示的图像（`Image.Image` 对象）。
 
 </details>
 
-此外，您可以获取版面解析产线配置文件，并加载配置文件进行预测。可执行如下命令将结果保存在 `my_path` 中：
-```
-paddlex --get_pipeline_config PaddleOCR-VL --save_path ./my_path
-```
-若您获取了配置文件，即可对版面解析产线各项配置进行自定义，只需要修改 `create_pipeline` 方法中的 `pipeline` 参数值为产线配置文件路径即可。示例如下：
-
-```python
-from paddlex import create_pipeline
-
-pipeline = create_pipeline(pipeline="./my_path/PaddleOCR-VL.yaml")
-
-output = pipeline.predict(
-    input="./paddleocr_vl_demo.png",
-    use_doc_orientation_classify=False,
-    use_doc_unwarping=False,
-    use_textline_orientation=False,
-)
-for res in output:
-    res.print() ## 打印预测的结构化输出
-    res.save_to_json(save_path="output") ## 保存当前图像的结构化json结果
-    res.save_to_markdown(save_path="output") ## 保存当前图像的markdown格式的结果
-```
-<b>注：</b> 配置文件中的参数为产线初始化参数，如果希望更改通用版面解析v3产线初始化参数，可以直接修改配置文件中的参数，并加载配置文件进行预测。同时，CLI 预测也支持传入配置文件，`--pipeline` 指定配置文件的路径即可。
 
 ## 3. 使用推理加速框架提升 VLM 推理性能
 
@@ -883,11 +936,15 @@ for res in output:
 
 #### 3.1.1 使用 Docker 镜像
 
-PaddleX 提供了 Docker 镜像，用于快速启动 vLLM 推理服务。可使用以下命令启动服务：
+PaddleX 针对不同推理加速框架提供了相应的 Docker 镜像，用于快速启动 VLM 推理服务：
+
+* **vLLM**：`ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server`
+* **SGLang**：`ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-sglang-server`
+
+以 vLLM 为例，可使用以下命令启动服务：
 
 ```bash
 docker run \
-    -it \
     --rm \
     --gpus all \
     --network host \
@@ -900,7 +957,6 @@ docker run \
 
 ```bash
 docker run \
-    -it \
     --rm \
     --gpus all \
     --network host \
@@ -908,9 +964,9 @@ docker run \
     paddlex_genai_server --model_name PaddleOCR-VL-0.9B --host 0.0.0.0 --port 8118 --backend vllm
 ```
 
-若您使用的是  NVIDIA 50 系显卡 (Compute Capacity >= 12)，需要在启动服务前安装指定版本的 FlashAttention:
+若您使用的是  NVIDIA 50 系显卡 (Compute Capability >= 12)，需要在启动服务前安装指定版本的 FlashAttention:
 
-```
+```bash
 docker run \
     -it \
     --rm \
@@ -932,16 +988,16 @@ python -m venv .venv
 # 激活环境
 source .venv/bin/activate
 # 安装 PaddleX
-python -m pip install paddlex
+python -m pip install "paddlex[ocr]"
 # 安装 vLLM 服务器插件
 paddlex --install genai-vllm-server
 # 安装 SGLang 服务器插件
 # paddlex --install genai-sglang-server
 ```
 
-若您使用的是  NVIDIA 50 系显卡 (Compute Capacity >= 12)，需要在启动服务前安装指定版本的 FlashAttention:
+若您使用的是  NVIDIA 50 系显卡 (Compute Capability >= 12)，需要在启动服务前安装指定版本的 FlashAttention:
 
-```
+```bash
 python -m pip install flash-attn==2.8.3
 ```
 
@@ -982,8 +1038,8 @@ paddlex --get_pipeline_config PaddleOCR-VL
 VLRecognition:
   ...
   genai_config:
-    backend: vllm-server
-    server_url: http://127.0.0.1:8118/v1
+    backend: vllm
+    server_url: http://127.0.0.1:8118
 ```
 
 之后，可以使用修改好的配置文件进行产线调用。例如通过 CLI 调用：
@@ -1050,16 +1106,78 @@ PaddleX 会将来自单张或多张输入图像中的子图分组并对服务器
 - **服务端**
   - vLLM：`gpu-memory-utilization=0.8`
 
-## 4. 开发集成/部署
-如果产线可以达到您对产线推理速度和精度的要求，您可以直接进行开发集成/部署。
 
-若您需要将产线直接应用在您的Python项目中，可以参考 [2.2 Python脚本方式](#22-python脚本方式集成)中的示例代码。
+## 4. 服务化部署
+
+若您需要将 PaddleOCR-VL 直接应用在您的Python项目中，可以参考 [2.2 Python脚本方式](#22-python脚本方式集成)中的示例代码。
 
-此外，PaddleX 也提供了其他三种部署方式，详细说明如下：
+此外，PaddleX 也提供了服务化部署方式，详细说明如下：
 
-🚀 <b>高性能推理</b>：在实际生产环境中，许多应用对部署策略的性能指标（尤其是响应速度）有着较严苛的标准，以确保系统的高效运行与用户体验的流畅性。为此，PaddleX 提供高性能推理插件，旨在对模型推理及前后处理进行深度性能优化，实现端到端流程的显著提速，详细的高性能推理流程请参考[PaddleX高性能推理指南](../../../pipeline_deploy/high_performance_inference.md)。
+### 4.1 安装依赖
+
+执行如下命令，通过 PaddleX CLI 安装 PaddleX 服务化部署插件：
+
+```bash
+paddlex --install serving
+```
+
+### 4.2 运行服务器
+
+通过 PaddleX CLI 运行服务器：
+
+```bash
+paddlex --serve --pipeline PaddleOCR-VL
+```
+
+可以看到类似以下展示的信息：
+
+```text
+INFO:     Started server process [63108]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+```
+
+如需调整配置（如模型路径、batch size、部署设备等），可指定 `--pipeline` 为自定义配置文件。
+
+与服务化部署相关的命令行选项如下：
+
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>说明</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>--pipeline</code></td>
+<td>PaddleX 产线注册名或产线配置文件路径。</td>
+</tr>
+<tr>
+<td><code>--device</code></td>
+<td>产线部署设备。默认情况下，当 GPU 可用时，将使用 GPU；否则使用 CPU。</td>
+</tr>
+<tr>
+<td><code>--host</code></td>
+<td>服务器绑定的主机名或 IP 地址。默认为 <code>0.0.0.0</code>。</td>
+</tr>
+<tr>
+<td><code>--port</code></td>
+<td>服务器监听的端口号。默认为 <code>8080</code>。</td>
+</tr>
+<tr>
+<td><code>--use_hpip</code></td>
+<td>如果指定，则使用高性能推理。请参考高性能推理文档了解更多信息。</td>
+</tr>
+<tr>
+<td><code>--hpi_config</code></td>
+<td>高性能推理配置。请参考高性能推理文档了解更多信息。</td>
+</tr>
+</tbody>
+</table>
 
-☁️ <b>服务化部署</b>：服务化部署是实际生产环境中常见的一种部署形式。通过将推理功能封装为服务，客户端可以通过网络请求来访问这些服务，以获取推理结果。PaddleX 支持多种产线服务化部署方案，详细的产线服务化部署流程请参考[PaddleX服务化部署指南](../../../pipeline_deploy/serving.md)。
+### 4.3 客户端调用
 
 以下是基础服务化部署的API参考与多语言服务调用示例：
 
@@ -1175,85 +1293,85 @@ PaddleX 会将来自单张或多张输入图像中的子图分组并对服务器
 <tr>
 <td><code>useDocUnwarping</code></td>
 <td><code>boolean</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_doc_unwarping</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>use_doc_unwarping</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>useLayoutDetection</code></td>
 <td><code>boolean</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_layout_detection</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>use_layout_detection</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>useChartRecognition</code></td>
 <td><code>boolean</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_chart_recognition</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>use_chart_recognition</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>layoutThreshold</code></td>
 <td><code>number</code> | <code>object</code> | </code><code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>layout_threshold</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>layout_threshold</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>layoutNms</code></td>
 <td><code>boolean</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>layout_nms</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>layout_nms</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>layoutUnclipRatio</code></td>
 <td><code>number</code> | <code>array</code> | <code>object</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>layout_unclip_ratio</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>layout_unclip_ratio</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>layoutMergeBboxesMode</code></td>
 <td><code>string</code> | <code>object</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>layout_merge_bboxes_mode</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>layout_merge_bboxes_mode</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>promptLabel</code></td>
 <td><code>string</code> | <code>object</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>prompt_label</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>prompt_label</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>formatBlockContent</code></td>
 <td><code>boolean</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>format_block_content</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>format_block_content</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>repetitionPenalty</code></td>
 <td><code>number</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>repetition_penalty</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>repetition_penalty</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>temperature</code></td>
 <td><code>number</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>temperature</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>temperature</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>topP</code></td>
 <td><code>number</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>top_p</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>top_p</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>minPixels</code></td>
 <td><code>number</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>min_pixels</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>min_pixels</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
 <td><code>maxPixels</code></td>
 <td><code>number</code> | <code>null</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>max_pixels</code> 参数相关说明。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>max_pixels</code> 参数相关说明。</td>
 <td>否</td>
 </tr>
 <tr>
@@ -1275,9 +1393,9 @@ PaddleX 会将来自单张或多张输入图像中的子图分组并对服务器
 <ul style="margin: 0 0 0 1em; padding-left: 0em;">
 <li>传入 <code>true</code>：返回图像。</li>
 <li>传入 <code>false</code>：不返回图像。</li>
-<li>若请求体中未提供该参数或传入 <code>null</code>：遵循产线配置文件<code>Serving.visualize</code> 的设置。</li>
+<li>若请求体中未提供该参数或传入 <code>null</code>：遵循配置文件<code>Serving.visualize</code> 的设置。</li>
 </ul>
-<br/>例如，在产线配置文件中添加如下字段：<br/>
+<br/>例如，在配置文件中添加如下字段：<br/>
 <pre><code>Serving:
   visualize: False
 </code></pre>
@@ -1324,7 +1442,7 @@ PaddleX 会将来自单张或多张输入图像中的子图分组并对服务器
 <tr>
 <td><code>prunedResult</code></td>
 <td><code>object</code></td>
-<td>产线对象的 <code>predict</code> 方法生成结果的 JSON 表示中 <code>res</code> 字段的简化版本，其中去除了 <code>input_path</code> 和 <code>page_index</code> 字段。</td>
+<td>对象的 <code>predict</code> 方法生成结果的 JSON 表示中 <code>res</code> 字段的简化版本，其中去除了 <code>input_path</code> 和 <code>page_index</code> 字段。</td>
 </tr>
 <tr>
 <td><code>markdown</code></td>
@@ -1334,7 +1452,7 @@ PaddleX 会将来自单张或多张输入图像中的子图分组并对服务器
 <tr>
 <td><code>outputImages</code></td>
 <td><code>object</code> | <code>null</code></td>
-<td>参见产线预测结果的 <code>img</code> 属性说明。图像为JPEG格式，使用Base64编码。</td>
+<td>参见预测结果的 <code>img</code> 属性说明。图像为JPEG格式，使用Base64编码。</td>
 </tr>
 <tr>
 <td><code>inputImage</code></td>
@@ -1900,132 +2018,3 @@ foreach ($result as $i => $item) {
 </code></pre></details>
 </details>
 <br/>
-
-📱 <b>端侧部署</b>：端侧部署是一种将计算和数据处理功能放在用户设备本身上的方式，设备可以直接处理数据，而不需要依赖远程的服务器。PaddleX 支持将模型部署在 Android 等端侧设备上，详细的端侧部署流程请参考[PaddleX端侧部署指南](../../../pipeline_deploy/on_device_deployment.md)。
-您可以根据需要选择合适的方式部署模型产线，进而进行后续的 AI 应用集成。
-
-## 5. 二次开发
-如果通用版面解析v3产线提供的默认模型权重在您的场景中，精度或速度不满意，您可以尝试利用<b>您自己拥有的特定领域或应用场景的数据</b>对现有模型进行进一步的<b>微调</b>，以提升通用版面解析v3产线的在您的场景中的识别效果。
-
-### 5.1 模型微调
-
-由于通用版面解析v3产线包含若干模块，模型产线的效果不及预期可能来自于其中任何一个模块。您可以对提取效果差的 case 进行分析，通过可视化图像，确定是哪个模块存在问题，并参考以下表格中对应的微调教程链接进行模型微调。
-
-
-<table>
-<thead>
-<tr>
-<th>情形</th>
-<th>微调模块</th>
-<th>微调参考链接</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>版面区域检测不准，如印章、表格未检出等</td>
-<td>版面区域检测模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/layout_detection.html">链接</a></td>
-</tr>
-<tr>
-<td>表格结构识别不准</td>
-<td>表格结构识别模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/table_structure_recognition.html">链接</a></td>
-</tr>
-<tr>
-<td>公式识别不准</td>
-<td>公式识别模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/formula_recognition.html">链接</a></td>
-</tr>
-<tr>
-<td>印章文本存在漏检</td>
-<td>印章文本检测模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/seal_text_detection.html">链接</a></td>
-</tr>
-<tr>
-<td>文本存在漏检</td>
-<td>文本检测模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/text_detection.html">链接</a></td>
-</tr>
-<tr>
-<td>文本内容都不准</td>
-<td>文本识别模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/text_recognition.html">链接</a></td>
-</tr>
-<tr>
-<td>垂直或者旋转文本行矫正不准</td>
-<td>文本行方向分类模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/textline_orientation_classification.html">链接</a></td>
-</tr>
-<tr>
-<td>整图旋转矫正不准</td>
-<td>文档图像方向分类模块</td>
-<td><a href="https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/doc_img_orientation_classification.html">链接</a></td>
-</tr>
-<tr>
-<td>图像扭曲矫正不准</td>
-<td>文本图像矫正模块</td>
-<td>暂不支持微调</td>
-</tr>
-</tbody>
-</table>
-
-### 5.2 模型应用
-当您使用私有数据集完成微调训练后，可获得本地模型权重文件。
-
-若您需要使用微调后的模型权重，只需对产线配置文件做修改，将微调后模型权重的本地路径替换至产线配置文件中的对应位置即可：
-
-```yaml
-......
-SubModules:
-  LayoutDetection:
-    module_name: layout_detection
-    model_name: PP-DocLayout_plus-L
-    model_dir: null # 替换为微调后的版面区域检测模型权重路径
-......
-SubPipelines:
-  GeneralOCR:
-    pipeline_name: OCR
-    text_type: general
-    use_doc_preprocessor: False
-    use_textline_orientation: False
-    SubModules:
-      TextDetection:
-        module_name: text_detection
-        model_name: PP-OCRv5_server_det
-        model_dir: null # 替换为微调后的文本测模型权重路径
-        limit_side_len: 960
-        limit_type: max
-        max_side_limit: 4000
-        thresh: 0.3
-        box_thresh: 0.6
-        unclip_ratio: 1.5
-
-      TextRecognition:
-        module_name: text_recognition
-        model_name: PP-OCRv5_server_rec
-        model_dir: null # 替换为微调后的文本识别模型权重路径
-        batch_size: 1
-        score_thresh: 0
-......
-```
-随后， 参考本地体验中的命令行方式或 Python 脚本方式，加载修改后的产线配置文件即可。
-
-##  6. 多硬件支持
-PaddleX 支持英伟达 GPU、昆仑芯 XPU、昇腾 NPU和寒武纪 MLU 等多种主流硬件设备，<b>仅需修改 `--device`参数</b>即可完成不同硬件之间的无缝切换。
-
-例如，您使用昇腾 NPU 进行版面解析产线的推理，使用的 CLI 命令为：
-
-```bash
-paddlex --pipeline PP-StructureV3 \
-        --input pp_structure_v3_demo.png  \
-        --use_doc_orientation_classify False \
-        --use_doc_unwarping False \
-        --use_textline_orientation False \
-        --use_e2e_wireless_table_rec_model True \
-        --save_path ./output \
-        --device npu:0
-```
-
-当然，您也可以在 Python 脚本中 `create_pipeline()` 时或者 `predict()` 时指定硬件设备。
-
-若您想在更多种类的硬件上使用通用版面解析v3产线，请参考[PaddleX多硬件使用指南](../../../other_devices_support/multi_devices_use_guide.md)。

From 61932c3462b1ae19de1162c5009037ed38dd8cb1 Mon Sep 17 00:00:00 2001
From: zhouchangda <zhouchangda@baidu.com>
Date: Fri, 24 Oct 2025 09:00:17 +0000
Subject: [PATCH 28/49] compatible with python3.9

---
 paddlex/inference/pipelines/paddleocr_vl/uilts.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddlex/inference/pipelines/paddleocr_vl/uilts.py b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
index 0f214b94ee..a3a8a9e3a2 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/uilts.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
@@ -18,7 +18,7 @@
 import re
 from collections import Counter
 from copy import deepcopy
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
 
 import numpy as np
 from PIL import Image
@@ -829,7 +829,7 @@ def convert_otsl_to_html(otsl_content: str):
     return export_to_html(table_data)
 
 
-def find_shortest_repeating_substring(s: str) -> str | None:
+def find_shortest_repeating_substring(s: str) -> Union[str, None]:
     """
     Find the shortest substring that repeats to form the entire string.
 
@@ -850,7 +850,7 @@ def find_shortest_repeating_substring(s: str) -> str | None:
 
 def find_repeating_suffix(
     s: str, min_len: int = 8, min_repeats: int = 5
-) -> Tuple[str, str, int] | None:
+) -> Union[Tuple[str, str, int], None]:
     """
     Detect if string ends with a repeating phrase.
 
@@ -888,7 +888,7 @@ def truncate_repetitive_content(
         min_len (int): Min length for char-level check.
 
     Returns:
-        Tuple[str, str]: (truncated_content, info_string)
+        Union[str, str]: (truncated_content, info_string)
     """
     stripped_content = content.strip()
     if not stripped_content:

From eaa32c14d4a719b5f2cc94b5af94f841b88e4827 Mon Sep 17 00:00:00 2001
From: zhouchangda <zhouchangda@baidu.com>
Date: Mon, 27 Oct 2025 04:00:23 +0000
Subject: [PATCH 29/49] support print parsing_res_list

---
 paddlex/inference/pipelines/paddleocr_vl/result.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py
index cef785acce..69f1762559 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/result.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/result.py
@@ -324,6 +324,16 @@ def _to_str(self, *args, **kwargs) -> dict[str, str]:
             data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
         if self["model_settings"]["use_layout_detection"]:
             data["layout_det_res"] = self["layout_det_res"].str["res"]
+        parsing_res_list = self["parsing_res_list"]
+        parsing_res_list = [
+            {
+                "block_label": parsing_res.label,
+                "block_content": parsing_res.content,
+                "block_bbox": parsing_res.bbox,
+            }
+            for parsing_res in parsing_res_list
+        ]
+        data["parsing_res_list"] = parsing_res_list
         return JsonMixin._to_str(data, *args, **kwargs)
 
     def _to_json(self, *args, **kwargs) -> dict[str, str]:

From 9579f20886f80b795bb75ad48097b3bebae744d3 Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Mon, 27 Oct 2025 19:21:45 +0800
Subject: [PATCH 30/49] update for new chat_template (#4672)

* update for new chat_template

* adapt to the old chat_template

* fix
---
 .../common/tokenizer/tokenizer_utils.py       | 26 +++++++++++++++----
 .../processors/paddleocr_vl/_paddleocr_vl.py  |  5 +++-
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/paddlex/inference/models/common/tokenizer/tokenizer_utils.py b/paddlex/inference/models/common/tokenizer/tokenizer_utils.py
index 2da3a88ea8..a34f3380bb 100644
--- a/paddlex/inference/models/common/tokenizer/tokenizer_utils.py
+++ b/paddlex/inference/models/common/tokenizer/tokenizer_utils.py
@@ -713,11 +713,27 @@ def _apply_chat_template(
                     "apply_chat_template do not support applying batch conversations, "
                     "so you should apply the conversation one by one."
                 )
-        query = self.chat_template.render(
-            messages=conversations,
-            **self.special_tokens_map,
-            add_generation_prompt=add_generation_prompt,
-        )
+        try:
+            query = self.chat_template.render(
+                messages=conversations,
+                **self.special_tokens_map,
+                add_generation_prompt=add_generation_prompt,
+            )
+        except TypeError:
+            for i in range(len(conversations)):
+                content = conversations[i]["content"]
+                if isinstance(content, list):
+                    new_content = ""
+                    for part in content:
+                        if part.get("type") == "text":
+                            new_content = part["text"]
+                            break
+                conversations[i]["content"] = new_content
+            query = self.chat_template.render(
+                messages=conversations,
+                **self.special_tokens_map,
+                add_generation_prompt=add_generation_prompt,
+            )
         return query
 
     def encode_chat_inputs(
diff --git a/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py
index 9b4dd9857b..ae99330e49 100644
--- a/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py
+++ b/paddlex/inference/models/doc_vlm/processors/paddleocr_vl/_paddleocr_vl.py
@@ -82,7 +82,10 @@ def preprocess(
             messages = [
                 {
                     "role": "user",
-                    "content": input_dict["query"],
+                    "content": [
+                        {"type": "image", "image": "placeholder"},  # placeholder
+                        {"type": "text", "text": input_dict["query"]},
+                    ],
                 }
             ]
             prompt = self.tokenizer.apply_chat_template(messages, tokenize=False)

From 0af6510a6ed0b050fb149c332f0c87f59eb27955 Mon Sep 17 00:00:00 2001
From: changdazhou <142379845+changdazhou@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:03:28 +0800
Subject: [PATCH 31/49] [cherry-pick]mv crop formula from gen_ai_client to
 pipeline (#4679)

* update docs

* compatible with python3.9

* support print parsing_res_list

* mv crop formula from gen_ai_client to pipeline
---
 paddlex/inference/models/doc_vlm/predictor.py | 43 -------------------
 .../pipelines/paddleocr_vl/pipeline.py        |  2 +
 .../inference/pipelines/paddleocr_vl/uilts.py | 32 ++++++++++++++
 3 files changed, 34 insertions(+), 43 deletions(-)

diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 1eec92b734..d876d73899 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -370,46 +370,6 @@ def _switch_inputs_to_device(self, input_dict):
         }
         return rst_dict
 
-    def crop_margin(self, img):  # 输入是OpenCV图像 (numpy数组)
-        import cv2
-
-        # 如果输入是彩色图像，转换为灰度图
-        if len(img.shape) == 3:
-            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        else:
-            gray = img.copy()
-
-        # 转换为0-255范围（确保是uint8类型）
-        if gray.dtype != np.uint8:
-            gray = gray.astype(np.uint8)
-
-        max_val = gray.max()
-        min_val = gray.min()
-
-        if max_val == min_val:
-            return img
-
-        # 归一化并二值化（与PIL版本逻辑一致）
-        data = (gray - min_val) / (max_val - min_val) * 255
-        data = data.astype(np.uint8)
-
-        # 创建二值图像（暗色区域为白色，亮色区域为黑色）
-        _, binary = cv2.threshold(data, 200, 255, cv2.THRESH_BINARY_INV)
-
-        # 查找非零像素坐标
-        coords = cv2.findNonZero(binary)
-
-        if coords is None:  # 如果没有找到任何内容，返回原图
-            return img
-
-        # 获取边界框
-        x, y, w, h = cv2.boundingRect(coords)
-
-        # 裁剪图像
-        cropped = img[y : y + h, x : x + w]
-
-        return cropped
-
     def _genai_client_process(
         self,
         data,
@@ -425,9 +385,6 @@ def _genai_client_process(
 
         def _process(item):
             image = item["image"]
-            prompt = item["query"]
-            if prompt == "Formula Recognition:":
-                image = self.crop_margin(image)
             if isinstance(image, str):
                 if image.startswith("http://") or image.startswith("https://"):
                     image_url = image
diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
index b4fb4903ff..10d6e2b296 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
@@ -35,6 +35,7 @@
 from .result import PaddleOCRVLBlock, PaddleOCRVLResult
 from .uilts import (
     convert_otsl_to_html,
+    crop_margin,
     filter_overlap_boxes,
     merge_blocks,
     tokenize_figure_of_table,
@@ -243,6 +244,7 @@ def get_layout_parsing_results(
                         text_prompt = "Chart Recognition:"
                     elif "formula" in block_label and block_label != "formula_number":
                         text_prompt = "Formula Recognition:"
+                        block_img = crop_margin(block_img)
                     block_imgs.append(block_img)
                     text_prompts.append(text_prompt)
                     figure_token_maps.append(figure_token_map)
diff --git a/paddlex/inference/pipelines/paddleocr_vl/uilts.py b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
index a3a8a9e3a2..8cd650fd89 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/uilts.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
@@ -923,3 +923,35 @@ def truncate_repetitive_content(
         return most_common_line
 
     return content
+
+
+def crop_margin(img):
+    import cv2
+
+    if len(img.shape) == 3:
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = img.copy()
+
+    if gray.dtype != np.uint8:
+        gray = gray.astype(np.uint8)
+
+    max_val = gray.max()
+    min_val = gray.min()
+
+    if max_val == min_val:
+        return img
+
+    data = (gray - min_val) / (max_val - min_val) * 255
+    data = data.astype(np.uint8)
+
+    _, binary = cv2.threshold(data, 200, 255, cv2.THRESH_BINARY_INV)
+    coords = cv2.findNonZero(binary)
+
+    if coords is None:
+        return img
+
+    x, y, w, h = cv2.boundingRect(coords)
+    cropped = img[y : y + h, x : x + w]
+
+    return cropped

From e0c509eef1b333e3a57545b04a47f7f701fadfb1 Mon Sep 17 00:00:00 2001
From: Tingquan Gao <35441050@qq.com>
Date: Tue, 28 Oct 2025 10:56:59 +0800
Subject: [PATCH 32/49] use model cache files when network is unavailable
 (#4676)

---
 paddlex/inference/models/__init__.py       |  3 -
 paddlex/inference/utils/official_models.py | 67 +++++++++++-----------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/paddlex/inference/models/__init__.py b/paddlex/inference/models/__init__.py
index 40c5818aea..449edaeb95 100644
--- a/paddlex/inference/models/__init__.py
+++ b/paddlex/inference/models/__init__.py
@@ -70,9 +70,6 @@ def create_predictor(
 
     if need_local_model(genai_config):
         if model_dir is None:
-            assert (
-                model_name in official_models
-            ), f"The model ({model_name}) is not supported! Please using directory of local model files or model name supported by PaddleX!"
             model_dir = official_models[model_name]
         else:
             assert Path(model_dir).exists(), f"{model_dir} is not exists!"
diff --git a/paddlex/inference/utils/official_models.py b/paddlex/inference/utils/official_models.py
index aacd80e247..98447af01d 100644
--- a/paddlex/inference/utils/official_models.py
+++ b/paddlex/inference/utils/official_models.py
@@ -45,7 +45,7 @@
     "ResNet152",
     "ResNet152_vd",
     "ResNet200_vd",
-    "PaddleOCR-VL-0.9B",
+    "PaddleOCR-VL",
     "PP-LCNet_x0_25",
     "PP-LCNet_x0_25_textline_ori",
     "PP-LCNet_x0_35",
@@ -345,7 +345,7 @@
     "en_PP-OCRv5_mobile_rec",
     "th_PP-OCRv5_mobile_rec",
     "el_PP-OCRv5_mobile_rec",
-    "PaddleOCR-VL-0.9B",
+    "PaddleOCR-VL",
     "PicoDet_layout_1x",
     "PicoDet_layout_1x_table",
     "PicoDet-L_layout_17cls",
@@ -419,27 +419,15 @@ def get_model(self, model_name):
         assert (
             model_name in self.model_list
         ), f"The model {model_name} is not supported on hosting {self.__class__.__name__}!"
-        if model_name == "PaddleOCR-VL-0.9B":
-            model_name = "PaddleOCR-VL"
 
         model_dir = self._save_dir / f"{model_name}"
-        if os.path.exists(model_dir):
-            logging.info(
-                f"Model files already exist. Using cached files. To redownload, please delete the directory manually: `{model_dir}`."
-            )
-        else:
-            logging.info(
-                f"Using official model ({model_name}), the model files will be automatically downloaded and saved in `{model_dir}`."
-            )
-            self._download(model_name, model_dir)
-            logging.debug(
-                f"`{model_name}` model files has been download from model source: `{self.alias}`!"
-            )
-
-        if model_name == "PaddleOCR-VL":
-            vl_model_dir = model_dir / "PaddleOCR-VL-0.9B"
-            if vl_model_dir.exists() and vl_model_dir.is_dir():
-                return vl_model_dir
+        logging.info(
+            f"Using official model ({model_name}), the model files will be automatically downloaded and saved in `{model_dir}`."
+        )
+        self._download(model_name, model_dir)
+        logging.debug(
+            f"`{model_name}` model files has been download from model source: `{self.alias}`!"
+        )
 
         return model_dir
 
@@ -573,21 +561,33 @@ def _build_hosters(self):
                     hosters.append(hoster_cls(self._save_dir))
         if len(hosters) == 0:
             logging.warning(
-                f"""No model hoster is available! Please check your network connection to one of the following model hosts:
-HuggingFace ({_HuggingFaceModelHoster.healthcheck_url}),
-ModelScope ({_ModelScopeModelHoster.healthcheck_url}),
-AIStudio ({_AIStudioModelHoster.healthcheck_url}), or
-BOS ({_BosModelHoster.healthcheck_url}).
-Otherwise, only local models can be used."""
+                f"No model hoster is available! Please check your network connection to one of the following model hosts: HuggingFace ({_HuggingFaceModelHoster.healthcheck_url}), ModelScope ({_ModelScopeModelHoster.healthcheck_url}), AIStudio ({_AIStudioModelHoster.healthcheck_url}), or BOS ({_BosModelHoster.healthcheck_url}). Otherwise, only local models can be used."
             )
         return hosters
 
     def _get_model_local_path(self, model_name):
-        if len(self._hosters) == 0:
-            msg = "No available model hosting platforms detected. Please check your network connection."
-            logging.error(msg)
-            raise Exception(msg)
-        return self._download_from_hoster(self._hosters, model_name)
+        if model_name == "PaddleOCR-VL-0.9B":
+            model_name = "PaddleOCR-VL"
+
+        model_dir = self._save_dir / f"{model_name}"
+        if os.path.exists(model_dir):
+            logging.info(
+                f"Model files already exist. Using cached files. To redownload, please delete the directory manually: `{model_dir}`."
+            )
+        else:
+            if len(self._hosters) == 0:
+                msg = "No available model hosting platforms detected. Please check your network connection."
+                logging.error(msg)
+                raise Exception(msg)
+
+            model_dir = self._download_from_hoster(self._hosters, model_name)
+
+        if model_name == "PaddleOCR-VL":
+            vl_model_dir = model_dir / "PaddleOCR-VL-0.9B"
+            if vl_model_dir.exists() and vl_model_dir.is_dir():
+                return vl_model_dir
+
+        return model_dir
 
     def _download_from_hoster(self, hosters, model_name):
         for idx, hoster in enumerate(hosters):
@@ -605,6 +605,9 @@ def _download_from_hoster(self, hosters, model_name):
                         f"Encountering exception when download model from {hoster.alias}: \n{e}, will try to download from other model sources: `{hosters[idx + 1].alias}`."
                     )
                     return self._download_from_hoster(hosters[idx + 1 :], model_name)
+        raise Exception(
+            f"No model source is available for model `{model_name}`! Please check model name and network, or use local model files!"
+        )
 
     def __contains__(self, model_name):
         return model_name in self.model_list

From 1da53a1aa48f57cf3e81b15358b1d9a01778580c Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Tue, 28 Oct 2025 21:21:48 +0800
Subject: [PATCH 33/49] [Feat] Add genai-vllm-server Dockerfile and build
 script (#4680)

* Add genai-vllm-server Dockerfile and build script

* Add lower bound for paddlex version

* Set paddleocr lower version to 3.3.5
---
 deploy/genai_vllm_server_docker/Dockerfile | 18 ++++++++++++++++++
 deploy/genai_vllm_server_docker/build.sh   |  9 +++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 deploy/genai_vllm_server_docker/Dockerfile
 create mode 100755 deploy/genai_vllm_server_docker/build.sh

diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
new file mode 100644
index 0000000000..8dfa80d716
--- /dev/null
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.10
+
+RUN apt-get update \
+    && apt-get install -y libgl1 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV PIP_NO_CACHE_DIR=0
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+
+RUN python -m pip install 'paddlex>=3.3.5,<3.4'
+
+RUN python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+    && paddlex --install genai-vllm-server
+
+EXPOSE 8080
+
+CMD ["paddlex_genai_server", "--model_name", "PaddleOCR-VL-0.9B", "--host", "0.0.0.0", "--port", "8080", "--backend", "vllm"]
diff --git a/deploy/genai_vllm_server_docker/build.sh b/deploy/genai_vllm_server_docker/build.sh
new file mode 100755
index 0000000000..27a82b3591
--- /dev/null
+++ b/deploy/genai_vllm_server_docker/build.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+docker build \
+    -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${1:latest}" \
+    --build-arg http_proxy="${http_proxy}" \
+    --build-arg https_proxy="${https_proxy}" \
+    --build-arg no_proxy="${no_proxy}" \
+    --build-arg PIP_INDEX_URL="${PIP_INDEX_URL}" \
+    .

From 802629c2f7efd500d782139943714974edfa92b9 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Tue, 28 Oct 2025 13:23:57 +0000
Subject: [PATCH 34/49] Bump version to 3.3.6

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index fa7adc7ac7..9c25013dbb 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.5
+3.3.6

From 2348ac094f3212b5e3dc8ebfacf9c49de90cc4ac Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Wed, 5 Nov 2025 14:32:02 +0000
Subject: [PATCH 35/49] Bump version to 3.3.7

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 9c25013dbb..86fb650440 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.6
+3.3.7

From 56078fe4a7dca61135e4ce2ac0b0ec8941cd1385 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Wed, 5 Nov 2025 22:31:01 +0800
Subject: [PATCH 36/49] Fix bugs (#4707)

---
 deploy/genai_vllm_server_docker/Dockerfile | 5 +++--
 paddlex/paddlex_cli.py                     | 9 +--------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index efad42ebe0..c8fd98297e 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -13,9 +13,10 @@ RUN python -m pip install "paddlex${PADDLEX_VERSION}"
 
 ARG BUILD_FOR_SM120=false
 RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
-        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     else \
-        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
+    fi \
     && paddlex --install genai-vllm-server
 
 EXPOSE 8080
diff --git a/paddlex/paddlex_cli.py b/paddlex/paddlex_cli.py
index e80c5e4e22..3d9bebda02 100644
--- a/paddlex/paddlex_cli.py
+++ b/paddlex/paddlex_cli.py
@@ -36,11 +36,7 @@
     is_dep_available,
     is_paddle2onnx_plugin_available,
 )
-from .utils.env import (
-    get_gpu_compute_capability,
-    get_paddle_cuda_version,
-    is_cuda_available,
-)
+from .utils.env import get_gpu_compute_capability, get_paddle_cuda_version
 from .utils.install import install_packages, uninstall_packages
 from .utils.interactive_get_pipeline import interactive_get_pipeline
 from .utils.pipeline_arguments import PIPELINE_ARGUMENTS
@@ -324,9 +320,6 @@ def _install_hpi_deps(device_type):
             )
 
     def _install_genai_deps(plugin_types):
-        if not is_cuda_available():
-            sys.exit("Currently, only GPU devices are supported.")
-
         fd_plugin_types = []
         not_fd_plugin_types = []
         for plugin_type in plugin_types:

From ddacf071bbdbfe10ca29befc6a8a4f289fb1d133 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Wed, 5 Nov 2025 14:32:31 +0000
Subject: [PATCH 37/49] Bump version to 3.3.8

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 86fb650440..37d02a6e38 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.7
+3.3.8

From f89f8c752e7fe24f46fcee40bc7481de1f94954d Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 6 Nov 2025 00:55:29 +0800
Subject: [PATCH 38/49] Fix bugs (#4708)

* Fix bugs

* Optimize
---
 deploy/genai_vllm_server_docker/Dockerfile |  4 +--
 deploy/genai_vllm_server_docker/build.sh   |  4 +--
 paddlex/paddlex_cli.py                     | 39 +++++++++++++---------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index c8fd98297e..1c7b222274 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -13,9 +13,9 @@ RUN python -m pip install "paddlex${PADDLEX_VERSION}"
 
 ARG BUILD_FOR_SM120=false
 RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
-        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
+        python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     else \
-        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
+        python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     fi \
     && paddlex --install genai-vllm-server
 
diff --git a/deploy/genai_vllm_server_docker/build.sh b/deploy/genai_vllm_server_docker/build.sh
index 74019f5c13..f0c8773a27 100755
--- a/deploy/genai_vllm_server_docker/build.sh
+++ b/deploy/genai_vllm_server_docker/build.sh
@@ -21,8 +21,8 @@ while [[ $# -gt 0 ]]; do
             shift
             ;;
         *)
-            echo "Unknown option: $1"
-            exit 1
+            echo "Unknown option: $1" >&2
+            exit 2
             ;;
     esac
 done
diff --git a/paddlex/paddlex_cli.py b/paddlex/paddlex_cli.py
index 3d9bebda02..78222f32d3 100644
--- a/paddlex/paddlex_cli.py
+++ b/paddlex/paddlex_cli.py
@@ -36,7 +36,11 @@
     is_dep_available,
     is_paddle2onnx_plugin_available,
 )
-from .utils.env import get_gpu_compute_capability, get_paddle_cuda_version
+from .utils.env import (
+    get_gpu_compute_capability,
+    get_paddle_cuda_version,
+    is_cuda_available,
+)
 from .utils.install import install_packages, uninstall_packages
 from .utils.interactive_get_pipeline import interactive_get_pipeline
 from .utils.pipeline_arguments import PIPELINE_ARGUMENTS
@@ -365,21 +369,24 @@ def _install_genai_deps(plugin_types):
 
         for plugin_type in plugin_types:
             if "vllm" in plugin_type or "sglang" in plugin_type:
-                try:
-                    install_packages(["wheel"], constraints="required")
-                    cap = get_gpu_compute_capability()
-                    if cap >= (12, 0):
-                        install_packages(
-                            ["xformers", "flash-attn == 2.8.3"], constraints="required"
-                        )
-                    else:
-                        install_packages(
-                            ["xformers", "flash-attn == 2.8.2"], constraints="required"
-                        )
-                except Exception:
-                    logging.error("Installation failed", exc_info=True)
-                    sys.exit(1)
-                break
+                install_packages(["xformers"], constraints="required")
+                if is_cuda_available():
+                    try:
+                        install_packages(["wheel"], constraints="required")
+                        cap = get_gpu_compute_capability()
+                        assert cap is not None
+                        if cap >= (12, 0):
+                            install_packages(
+                                ["flash-attn == 2.8.3"], constraints="required"
+                            )
+                        else:
+                            install_packages(
+                                ["flash-attn == 2.8.2"], constraints="required"
+                            )
+                    except Exception:
+                        logging.error("Installation failed", exc_info=True)
+                        sys.exit(1)
+                    break
 
         logging.info(
             "Successfully installed the generative AI plugin"

From 8cb743430ad453555a3f7086bec28177a1cc6351 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 6 Nov 2025 01:44:21 +0800
Subject: [PATCH 39/49] Fix bug (#4709)

---
 deploy/genai_vllm_server_docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index 1c7b222274..8b8ef0d361 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -13,7 +13,7 @@ RUN python -m pip install "paddlex${PADDLEX_VERSION}"
 
 ARG BUILD_FOR_SM120=false
 RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
-        python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
+        python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.11/flash_attn-2.8.3%2Bcu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     else \
         python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     fi \

From a88b267b1f2d01cbc46372cf57f1133f03589d2f Mon Sep 17 00:00:00 2001
From: gaotingquan <gaotingquan@baidu.com>
Date: Mon, 10 Nov 2025 06:45:58 +0000
Subject: [PATCH 40/49] disable mkldnn by default for PP-DocLayoutV2

---
 paddlex/inference/utils/mkldnn_blocklist.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddlex/inference/utils/mkldnn_blocklist.py b/paddlex/inference/utils/mkldnn_blocklist.py
index 972a98f961..b62fb0e145 100644
--- a/paddlex/inference/utils/mkldnn_blocklist.py
+++ b/paddlex/inference/utils/mkldnn_blocklist.py
@@ -56,4 +56,5 @@
     "PP-FormulaNet_plus-L",
     "PP-FormulaNet_plus-M",
     "PP-FormulaNet_plus-S",
+    "PP-DocLayoutV2",
 ]

From 54baddbc6721942ec4d096fe0b25738129903e21 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Mon, 10 Nov 2025 20:31:02 +0800
Subject: [PATCH 41/49] [Feat] Support vLLM deployment on DCUs (#4710)

* Support vLLM deployment on DCUs

* Fix

* Fix DCU check
---
 paddlex/inference/genai/backends/vllm.py | 11 +++++++++++
 paddlex/utils/env.py                     | 10 ++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/paddlex/inference/genai/backends/vllm.py b/paddlex/inference/genai/backends/vllm.py
index 35a1b77eaa..9b21d860bd 100644
--- a/paddlex/inference/genai/backends/vllm.py
+++ b/paddlex/inference/genai/backends/vllm.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from ....utils import logging
 from ....utils.deps import is_genai_engine_plugin_available, require_genai_engine_plugin
 from ..configs.utils import (
     backend_config_to_args,
@@ -61,6 +62,16 @@ def run_vllm_server(host, port, model_name, model_dir, config, chat_template_pat
         },
     )
 
+    import torch
+
+    if torch.version.hip is not None and torch.version.cuda is None:
+        # For DCU
+        if "api-server-count" in config:
+            logging.warning(
+                "Key 'api-server-count' will be popped as it is not supported"
+            )
+            config.pop("api-server-count")
+
     args = backend_config_to_args(config)
     args = parser.parse_args(args)
     validate_parsed_serve_args(args)
diff --git a/paddlex/utils/env.py b/paddlex/utils/env.py
index 1991d0ec51..f42b0130c5 100644
--- a/paddlex/utils/env.py
+++ b/paddlex/utils/env.py
@@ -65,13 +65,18 @@ def is_cuda_available():
         import paddle.device
 
         # TODO: Check runtime availability
-        return paddle.device.is_compiled_with_cuda()
+        return (
+            paddle.device.is_compiled_with_cuda() and not paddle.is_compiled_with_rocm()
+        )
     else:
         # If Paddle is unavailable, check GPU availability using PyTorch API.
         require_deps("torch")
+
         import torch.cuda
+        import torch.version
 
-        return torch.cuda.is_available()
+        # Distinguish GPUs and DCUs by checking `torch.version.cuda`
+        return torch.cuda.is_available() and torch.version.cuda
 
 
 def get_gpu_compute_capability():
@@ -85,6 +90,7 @@ def get_gpu_compute_capability():
         else:
             # If Paddle is unavailable, retrieve GPU compute capability from PyTorch instead.
             require_deps("torch")
+
             import torch.cuda
 
             cap = torch.cuda.get_device_capability()

From 24acf03578012834ee26c11bbfee04375f758132 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Mon, 10 Nov 2025 20:41:09 +0800
Subject: [PATCH 42/49] Bump FD version from 2.3.0rc0 to 2.3.0 (#4721)

---
 paddlex/utils/deps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/utils/deps.py b/paddlex/utils/deps.py
index c1781308b8..cfb43eeea8 100644
--- a/paddlex/utils/deps.py
+++ b/paddlex/utils/deps.py
@@ -308,7 +308,7 @@ def get_genai_fastdeploy_spec(device_type):
     if device_type not in SUPPORTED_DEVICE_TYPES:
         raise ValueError(f"Unsupported device type: {device_type}")
     if device_type == "gpu":
-        return "fastdeploy-gpu == 2.3.0rc0"
+        return "fastdeploy-gpu == 2.3.0"
     else:
         raise AssertionError
 

From 2526aadfdb6e09e8a7b1f3deacd0b4648c7e15ce Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Mon, 10 Nov 2025 12:43:03 +0000
Subject: [PATCH 43/49] Bump version to 3.3.9

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 37d02a6e38..3b47f2e4f8 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.8
+3.3.9

From acab8aa60754e8f8e7fae8bf9303e5f5773fa125 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 13 Nov 2025 14:17:20 +0800
Subject: [PATCH 44/49] Replace naive eager attention with SDPA (#4725)

---
 .../doc_vlm/modeling/paddleocr_vl/_ernie.py   |  9 ++-
 .../doc_vlm/modeling/paddleocr_vl/_siglip.py  | 66 ++++++++++++-------
 paddlex/inference/models/doc_vlm/predictor.py |  9 ++-
 paddlex/inference/utils/misc.py               | 11 ++++
 4 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py
index 7e43b3cff0..2a653613df 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_ernie.py
@@ -296,7 +296,7 @@ def forward(self, hidden_states):
                 3. Scale by learned weight parameter
             - Maintains original dtype for numerical stability during computation
         """
-        if self.config.fuse_rms_norm:
+        if hidden_states.dtype != paddle.float16 and self.config.fuse_rms_norm:
             return fused_rms_norm_ext(
                 hidden_states, self.weight, self.variance_epsilon
             )[0].astype(self.weight.dtype)
@@ -854,8 +854,15 @@ def core_attn(
         v = tensor.transpose(x=v, perm=perm)
 
         replicate = self.config.num_attention_heads // self.config.num_key_value_heads
+        is_float16 = k.dtype == paddle.float16
+        if is_float16:
+            k = k.cast(paddle.float32)
+            v = v.cast(paddle.float32)
         k = paddle.repeat_interleave(k, replicate, axis=1)
         v = paddle.repeat_interleave(v, replicate, axis=1)
+        if is_float16:
+            k = k.cast(paddle.float16)
+            v = v.cast(paddle.float16)
 
         scale_qk_coeff = self.config.scale_qk_coeff * self.head_dim**0.5
         product = paddle.matmul(x=q.scale(1.0 / scale_qk_coeff), y=k, transpose_y=True)
diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
index a4a3c4a0c1..0495a23b2b 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
@@ -100,15 +100,22 @@ def eager_attention_forward(
     dropout: float = 0.0,
     **kwargs,
 ):
-    attn_weights = paddle.matmul(query, key.transpose((0, 1, 3, 2))) * scaling
+    origin_dtype = query.dtype
+
+    attn_weights = paddle.matmul(x=query.scale(scaling), y=key, transpose_y=True)
+    attn_weights = attn_weights.cast(paddle.float32)
+
     if attention_mask is not None:
+        attnetion_mask = attention_mask.cast(paddle.float32)
         attn_weights = attn_weights + attention_mask
 
-    attn_weights = F.softmax(attn_weights, axis=-1, dtype="float32").astype(query.dtype)
+    attn_weights = F.softmax(attn_weights, axis=-1)
+    attn_weights = attn_weights.cast(origin_dtype)
+
     attn_weights = F.dropout(attn_weights, p=dropout, training=module.training)
 
     attn_output = paddle.matmul(attn_weights, value)
-    attn_output = attn_output.transpose((0, 2, 1, 3)).contiguous()
+    attn_output = attn_output.transpose((0, 2, 1, 3))
 
     return attn_output, attn_weights
 
@@ -138,6 +145,9 @@ def forward(
         cu_seqlens: Optional[List[paddle.Tensor]] = None,
         rope_emb: Optional[Tuple[paddle.Tensor, paddle.Tensor]] = None,  # (cos, sin)
     ):
+        if output_attentions:
+            raise NotImplementedError
+
         B, L, D = hidden_states.shape
 
         q = self.q_proj(hidden_states)
@@ -145,7 +155,6 @@ def forward(
         v = self.v_proj(hidden_states)
 
         # [B, L, H, Dh]
-
         q = q.reshape([B, L, self.num_heads, self.head_dim])
         k = k.reshape([B, L, self.num_heads, self.head_dim])
         v = v.reshape([B, L, self.num_heads, self.head_dim])
@@ -153,29 +162,38 @@ def forward(
             cos, sin = rope_emb
             q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
 
-        # → [B, H, L, Dh]
-        q = q.transpose([0, 2, 1, 3])
-        k = k.transpose([0, 2, 1, 3])
-        v = v.transpose([0, 2, 1, 3])
-
-        attn_output, attn_weights = eager_attention_forward(
-            self,
-            q,
-            k,
-            v,
-            attention_mask,
-            is_causal=self.is_causal,
-            scaling=self.scale,
-            dropout=0.0 if not self.training else self.dropout,
-        )
-        attn_output = attn_output.reshape([B, L, D]).contiguous()
+        if q.dtype == paddle.float32:
+            # → [B, H, L, Dh]
+            q = q.transpose([0, 2, 1, 3])
+            k = k.transpose([0, 2, 1, 3])
+            v = v.transpose([0, 2, 1, 3])
+
+            attn_output, _ = eager_attention_forward(
+                self,
+                q,
+                k,
+                v,
+                attention_mask,
+                is_causal=self.is_causal,
+                scaling=self.scale,
+                dropout=0.0 if not self.training else self.dropout,
+            )
+            attn_output = attn_output.reshape([B, L, D])
+        else:
+            attn_output = paddle.nn.functional.scaled_dot_product_attention(
+                q,
+                k,
+                v,
+                attention_mask,
+                dropout_p=self.dropout,
+                is_causal=self.is_causal,
+                training=self.training,
+            )
+        attn_output = attn_output.reshape([B, L, D])
 
         attn_output = self.out_proj(attn_output)
 
-        if not output_attentions:
-            attn_weights = None
-
-        return attn_output, attn_weights
+        return attn_output, None
 
 
 class SiglipVisionEmbeddings(nn.Layer):
diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index d2eb1d9a55..f0f3b0a61a 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -29,7 +29,7 @@
 from ....utils.deps import require_genai_client_plugin
 from ....utils.device import TemporaryDeviceChanger
 from ...common.batch_sampler import DocVLMBatchSampler
-from ...utils.misc import is_bfloat16_available
+from ...utils.misc import is_bfloat16_available, is_float16_available
 from ..base import BasePredictor
 from .result import DocVLMResult
 
@@ -54,7 +54,12 @@ def __init__(self, *args, **kwargs):
 
         if self._use_local_model:
             self.device = kwargs.get("device", None)
-            self.dtype = "bfloat16" if is_bfloat16_available(self.device) else "float32"
+            if is_bfloat16_available(self.device):
+                self.dtype = "bfloat16"
+            elif is_float16_available(self.device):
+                self.dtype = "float16"
+            else:
+                self.dtype = "float32"
 
             self.infer, self.processor = self._build(**kwargs)
 
diff --git a/paddlex/inference/utils/misc.py b/paddlex/inference/utils/misc.py
index bb6fe29ad2..28e8803604 100644
--- a/paddlex/inference/utils/misc.py
+++ b/paddlex/inference/utils/misc.py
@@ -32,3 +32,14 @@ def is_bfloat16_available(device):
     return (
         "npu" in get_device_type() or paddle.amp.is_bfloat16_supported()
     ) and device_type in ("gpu", "npu", "xpu", "mlu")
+
+
+def is_float16_available(device):
+    import paddle.amp
+
+    if device is None:
+        device = get_default_device()
+    device_type, _ = parse_device(device)
+    return (
+        "npu" in get_device_type() or paddle.amp.is_float16_supported()
+    ) and device_type in ("gpu", "npu", "xpu", "mlu", "dcu")

From dc0075e244a44921828ecfc5500e8a5552849c7a Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 21 Nov 2025 17:09:54 +0800
Subject: [PATCH 45/49] HPI Supports paddle 3.2 (#4754)

* HPI allow untested Paddle versions for safe mode and update paddle version in HPS

* Update deps

* Limit CC>=8.0 for SDPA
---
 deploy/hps/server_env/Dockerfile                    |  1 +
 deploy/hps/server_env/cpu_version.txt               |  2 +-
 deploy/hps/server_env/gpu_version.txt               |  2 +-
 deploy/hps/server_env/requirements/app.in           |  1 +
 deploy/hps/server_env/requirements/cpu.in           |  2 +-
 deploy/hps/server_env/requirements/cpu.txt          | 13 ++++++++++---
 deploy/hps/server_env/requirements/gpu.in           |  2 +-
 deploy/hps/server_env/requirements/gpu.txt          | 13 ++++++++++---
 .../models/doc_vlm/modeling/paddleocr_vl/_siglip.py |  6 +++++-
 paddlex/inference/utils/hpi.py                      |  9 +++++++++
 10 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/deploy/hps/server_env/Dockerfile b/deploy/hps/server_env/Dockerfile
index 1038a12d7f..a908b2095a 100644
--- a/deploy/hps/server_env/Dockerfile
+++ b/deploy/hps/server_env/Dockerfile
@@ -46,6 +46,7 @@ ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PIP_INDEX_URL=${PIP_INDEX_URL}
 
+RUN python -m pip install pip==25.2
 
 # Requirement collection
 FROM base AS rc
diff --git a/deploy/hps/server_env/cpu_version.txt b/deploy/hps/server_env/cpu_version.txt
index 940ac09aa6..5503126d59 100644
--- a/deploy/hps/server_env/cpu_version.txt
+++ b/deploy/hps/server_env/cpu_version.txt
@@ -1 +1 @@
-0.3.9
+0.3.10
diff --git a/deploy/hps/server_env/gpu_version.txt b/deploy/hps/server_env/gpu_version.txt
index 5503126d59..208059121d 100644
--- a/deploy/hps/server_env/gpu_version.txt
+++ b/deploy/hps/server_env/gpu_version.txt
@@ -1 +1 @@
-0.3.10
+0.3.11
diff --git a/deploy/hps/server_env/requirements/app.in b/deploy/hps/server_env/requirements/app.in
index 2cb9003a5f..a32c621604 100644
--- a/deploy/hps/server_env/requirements/app.in
+++ b/deploy/hps/server_env/requirements/app.in
@@ -4,4 +4,5 @@ numpy >= 1.24
 opencv-contrib-python == 4.10.0.84
 pycocotools >= 2
 pydantic >= 2
+safetensors @ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
 typing-extensions >= 4.11
diff --git a/deploy/hps/server_env/requirements/cpu.in b/deploy/hps/server_env/requirements/cpu.in
index b265f14dcd..328c4b54b8 100644
--- a/deploy/hps/server_env/requirements/cpu.in
+++ b/deploy/hps/server_env/requirements/cpu.in
@@ -1 +1 @@
-paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.1.1-cp310-cp310-linux_x86_64.whl
+paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.2.1-cp310-cp310-linux_x86_64.whl
diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt
index bbbeaab8e4..dad601aea9 100644
--- a/deploy/hps/server_env/requirements/cpu.txt
+++ b/deploy/hps/server_env/requirements/cpu.txt
@@ -171,6 +171,7 @@ lxml==5.3.1
     # via
     #   paddlex (../../../setup.py)
     #   premailer
+    #   python-docx
 markupsafe==3.0.2
     # via jinja2
 marshmallow==3.26.1
@@ -238,7 +239,7 @@ packaging==24.2
     #   matplotlib
     #   paddlex (../../../setup.py)
     #   scikit-image
-paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.1.1-cp310-cp310-linux_x86_64.whl
+paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.2.1-cp310-cp310-linux_x86_64.whl
     # via -r requirements/cpu.in
 pandas==1.3.5
     # via paddlex (../../../setup.py)
@@ -295,6 +296,8 @@ python-dateutil==2.9.0.post0
     # via
     #   matplotlib
     #   pandas
+python-docx==1.2.0
+    # via paddlex (../../../setup.py)
 pytz==2025.1
     # via pandas
 pyyaml==6.0.2
@@ -326,8 +329,11 @@ ruamel-yaml==0.18.10
     # via paddlex (../../../setup.py)
 ruamel-yaml-clib==0.2.12
     # via ruamel-yaml
-safetensors==0.6.2
-    # via paddlex (../../../setup.py)
+safetensors @ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+    # via
+    #   -r requirements/app.in
+    #   paddlepaddle
+    #   paddlex (../../../setup.py)
 scikit-image==0.24.0
     # via paddlex (../../../setup.py)
 scikit-learn==1.6.1
@@ -396,6 +402,7 @@ typing-extensions==4.12.2
     #   paddlex (../../../setup.py)
     #   pydantic
     #   pydantic-core
+    #   python-docx
     #   sqlalchemy
     #   typing-inspect
     #   uvicorn
diff --git a/deploy/hps/server_env/requirements/gpu.in b/deploy/hps/server_env/requirements/gpu.in
index 152945b4c3..f2cbd2c900 100644
--- a/deploy/hps/server_env/requirements/gpu.in
+++ b/deploy/hps/server_env/requirements/gpu.in
@@ -1 +1 @@
-paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.1.1%2Bfc-cp310-cp310-linux_x86_64.whl
+paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.2.1%2Bfc-cp310-cp310-linux_x86_64.whl
diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt
index 3484d72f48..8f6366b71d 100644
--- a/deploy/hps/server_env/requirements/gpu.txt
+++ b/deploy/hps/server_env/requirements/gpu.txt
@@ -171,6 +171,7 @@ lxml==5.3.1
     # via
     #   paddlex (../../../setup.py)
     #   premailer
+    #   python-docx
 markupsafe==3.0.2
     # via jinja2
 marshmallow==3.26.1
@@ -238,7 +239,7 @@ packaging==24.2
     #   matplotlib
     #   paddlex (../../../setup.py)
     #   scikit-image
-paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.1.1%2Bfc-cp310-cp310-linux_x86_64.whl
+paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.2.1%2Bfc-cp310-cp310-linux_x86_64.whl
     # via -r requirements/gpu.in
 pandas==1.3.5
     # via paddlex (../../../setup.py)
@@ -295,6 +296,8 @@ python-dateutil==2.9.0.post0
     # via
     #   matplotlib
     #   pandas
+python-docx==1.2.0
+    # via paddlex (../../../setup.py)
 pytz==2025.1
     # via pandas
 pyyaml==6.0.2
@@ -326,8 +329,11 @@ ruamel-yaml==0.18.10
     # via paddlex (../../../setup.py)
 ruamel-yaml-clib==0.2.12
     # via ruamel-yaml
-safetensors==0.6.2
-    # via paddlex (../../../setup.py)
+safetensors @ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+    # via
+    #   -r requirements/app.in
+    #   paddlepaddle-gpu
+    #   paddlex (../../../setup.py)
 scikit-image==0.24.0
     # via paddlex (../../../setup.py)
 scikit-learn==1.6.1
@@ -396,6 +402,7 @@ typing-extensions==4.12.2
     #   paddlex (../../../setup.py)
     #   pydantic
     #   pydantic-core
+    #   python-docx
     #   sqlalchemy
     #   starlette
     #   typing-inspect
diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
index 0495a23b2b..e7e1487506 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
@@ -42,6 +42,7 @@
 import paddle.nn as nn
 import paddle.nn.functional as F
 
+from ......utils.env import get_gpu_compute_capability
 from ....common.vlm.activations import ACT2FN
 from ....common.vlm.transformers import PretrainedModel
 from ....common.vlm.transformers.model_outputs import (
@@ -137,6 +138,9 @@ def __init__(self, config):
         self.q_proj = nn.Linear(self.embed_dim, self.embed_dim)
         self.out_proj = nn.Linear(self.embed_dim, self.embed_dim)
 
+        cap = get_gpu_compute_capability()
+        self._supports_sdpa = cap >= (8, 0) if cap is not None else False
+
     def forward(
         self,
         hidden_states: paddle.Tensor,  # [B, L, D]
@@ -162,7 +166,7 @@ def forward(
             cos, sin = rope_emb
             q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
 
-        if q.dtype == paddle.float32:
+        if not self._supports_sdpa or q.dtype == paddle.float32:
             # → [B, H, L, Dh]
             q = q.transpose([0, 2, 1, 3])
             k = k.transpose([0, 2, 1, 3])
diff --git a/paddlex/inference/utils/hpi.py b/paddlex/inference/utils/hpi.py
index 613fe4a293..ec8e958654 100644
--- a/paddlex/inference/utils/hpi.py
+++ b/paddlex/inference/utils/hpi.py
@@ -24,6 +24,7 @@
 from pydantic import BaseModel, Field
 from typing_extensions import Annotated, TypeAlias
 
+from ...utils import logging
 from ...utils.deps import function_requires_deps, is_paddle2onnx_plugin_available
 from ...utils.env import get_paddle_cuda_version, get_paddle_version
 from ...utils.flags import USE_PIR_TRT
@@ -156,6 +157,14 @@ def suggest_inference_backend_and_config(
         return None, f"Inference backend {repr(hpi_config.backend)} is unavailable."
 
     paddle_version = get_paddle_version()
+
+    if paddle_version[:3] >= (3, 1, 0):
+        logging.debug(
+            "Paddle version %s is not supported yet. The prior knowledge of Paddle 3.1.1 will be used.",
+            paddle_version,
+        )
+        paddle_version = (3, 1, 1, None)
+
     if (3, 0) <= paddle_version[:2] <= (3, 1) and paddle_version[3] is None:
         if paddle_version[2] == 0:
             paddle_version = f"paddle{paddle_version[0]}{paddle_version[1]}"

From d8719aa1b534f38073f45bea6f901106217a314b Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Mon, 24 Nov 2025 20:12:01 +0800
Subject: [PATCH 46/49] update fd config (#4760)

---
 paddlex/inference/genai/configs/paddleocr_vl_09b.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/inference/genai/configs/paddleocr_vl_09b.py b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
index cee4c76059..bbc0204249 100644
--- a/paddlex/inference/genai/configs/paddleocr_vl_09b.py
+++ b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
@@ -20,7 +20,7 @@ def get_config(backend):
             "max-model-len": 16384,
             "max-num-batched-tokens": 16384,
             "max-num-seqs": 256,
-            "workers": 2,
+            "workers": 4,
             "graph-optimization-config": '{"graph_opt_level":0, "use_cudagraph":true}',
         }
     elif backend == "vllm":

From 1bec5c29e8c67fbeac62fa33a9354c159f435095 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Mon, 24 Nov 2025 13:11:53 +0000
Subject: [PATCH 47/49] Bump version to 3.3.10

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 3b47f2e4f8..5f6fc5edc2 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.9
+3.3.10

From acdc0534a8de1fa53f6400c3e95d107cad003c09 Mon Sep 17 00:00:00 2001
From: Yugsolanki <yugsolanki15@gmail.com>
Date: Wed, 26 Nov 2025 19:41:48 +0530
Subject: [PATCH 48/49] Fix: Update imports to resolve ModuleNotFoundError for
 'langchain.docstore'

---
 paddlex/inference/pipelines/components/retriever/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py
index a348836836..7c42c32d15 100644
--- a/paddlex/inference/pipelines/components/retriever/base.py
+++ b/paddlex/inference/pipelines/components/retriever/base.py
@@ -22,8 +22,8 @@
 from .....utils.subclass_register import AutoRegisterABCMetaClass
 
 if is_dep_available("langchain"):
-    from langchain.docstore.document import Document
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_core.documents.base import Document
+    from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
 if is_dep_available("langchain-community"):
     from langchain_community import vectorstores
     from langchain_community.vectorstores import FAISS

From d50bb5f801fed8fef01ff8fb2f86ea5ba32ab052 Mon Sep 17 00:00:00 2001
From: Yugsolanki <yugsolanki15@gmail.com>
Date: Wed, 26 Nov 2025 20:10:06 +0530
Subject: [PATCH 49/49] Refactor: Eliminate langchain_classic dependency using
 core langchain modules

---
 paddlex/inference/pipelines/components/retriever/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py
index 7c42c32d15..a35b5577ae 100644
--- a/paddlex/inference/pipelines/components/retriever/base.py
+++ b/paddlex/inference/pipelines/components/retriever/base.py
@@ -23,7 +23,7 @@
 
 if is_dep_available("langchain"):
     from langchain_core.documents.base import Document
-    from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_text_splitters.character import RecursiveCharacterTextSplitter
 if is_dep_available("langchain-community"):
     from langchain_community import vectorstores
     from langchain_community.vectorstores import FAISS