diff --git a/docling/datamodel/accelerator_options.py b/docling/datamodel/accelerator_options.py index 1b0ea8cfe5..d3d2576ce6 100644 --- a/docling/datamodel/accelerator_options.py +++ b/docling/datamodel/accelerator_options.py @@ -17,6 +17,7 @@ class AcceleratorDevice(str, Enum): CPU = "cpu" CUDA = "cuda" MPS = "mps" + XPU = "xpu" class AcceleratorOptions(BaseSettings): @@ -30,13 +31,13 @@ class AcceleratorOptions(BaseSettings): @field_validator("device") def validate_device(cls, value): - # "auto", "cpu", "cuda", "mps", or "cuda:N" + # "auto", "cpu", "cuda", "mps", "xpu", or "cuda:N" if value in {d.value for d in AcceleratorDevice} or re.match( r"^cuda(:\d+)?$", value ): return value raise ValueError( - "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'." + "Invalid device option. Use 'auto', 'cpu', 'mps', 'xpu', 'cuda', or 'cuda:N'." ) @model_validator(mode="before") diff --git a/docling/datamodel/layout_model_specs.py b/docling/datamodel/layout_model_specs.py index 93eee50d9f..e4bcafafb8 100644 --- a/docling/datamodel/layout_model_specs.py +++ b/docling/datamodel/layout_model_specs.py @@ -19,6 +19,7 @@ class LayoutModelConfig(BaseModel): AcceleratorDevice.CPU, AcceleratorDevice.CUDA, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ] @property diff --git a/docling/datamodel/pipeline_options_asr_model.py b/docling/datamodel/pipeline_options_asr_model.py index 24b161ada1..217109eb5f 100644 --- a/docling/datamodel/pipeline_options_asr_model.py +++ b/docling/datamodel/pipeline_options_asr_model.py @@ -39,6 +39,7 @@ class InlineAsrOptions(BaseAsrOptions): AcceleratorDevice.CPU, AcceleratorDevice.CUDA, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ] @property diff --git a/docling/datamodel/pipeline_options_vlm_model.py b/docling/datamodel/pipeline_options_vlm_model.py index d90b8d7010..d0701a1063 100644 --- a/docling/datamodel/pipeline_options_vlm_model.py +++ b/docling/datamodel/pipeline_options_vlm_model.py @@ -93,6 +93,7 @@ class InlineVlmOptions(BaseVlmOptions): AcceleratorDevice.CPU, AcceleratorDevice.CUDA, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ] stop_strings: List[str] = [] diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py index 8b03c2c219..b811a2d4d6 100644 --- a/docling/datamodel/vlm_model_specs.py +++ b/docling/datamodel/vlm_model_specs.py @@ -28,6 +28,7 @@ supported_devices=[ AcceleratorDevice.CPU, AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, ], extra_generation_config=dict(skip_special_tokens=False), scale=2.0, @@ -92,6 +93,7 @@ supported_devices=[ AcceleratorDevice.CPU, AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, ], torch_dtype="bfloat16", scale=2.0, @@ -106,6 +108,7 @@ inference_framework=InferenceFramework.VLLM, supported_devices=[ AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, @@ -123,6 +126,7 @@ AcceleratorDevice.CPU, AcceleratorDevice.CUDA, # AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ], torch_dtype="bfloat16", scale=2.0, @@ -150,6 +154,7 @@ inference_framework=InferenceFramework.VLLM, supported_devices=[ AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, @@ -167,6 +172,7 @@ AcceleratorDevice.CPU, AcceleratorDevice.CUDA, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, @@ -179,6 +185,7 @@ inference_framework=InferenceFramework.VLLM, supported_devices=[ AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, @@ -201,7 +208,11 @@ response_format=ResponseFormat.MARKDOWN, inference_framework=InferenceFramework.TRANSFORMERS, transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ, - supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA], + supported_devices=[ + AcceleratorDevice.CPU, + AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, + ], scale=2.0, temperature=0.0, ) @@ -224,7 +235,11 @@ response_format=ResponseFormat.MARKDOWN, inference_framework=InferenceFramework.TRANSFORMERS, transformers_model_type=TransformersModelType.AUTOMODEL_CAUSALLM, - supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA], + supported_devices=[ + AcceleratorDevice.CPU, + AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, + ], scale=2.0, temperature=0.0, extra_generation_config=dict(num_logits_to_keep=0), @@ -253,6 +268,7 @@ AcceleratorDevice.CPU, AcceleratorDevice.CUDA, # AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, @@ -295,6 +311,7 @@ AcceleratorDevice.CUDA, AcceleratorDevice.CPU, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, @@ -313,6 +330,7 @@ AcceleratorDevice.CPU, AcceleratorDevice.CUDA, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py index 1d59761d81..05c74506c9 100644 --- a/docling/models/code_formula_model.py +++ b/docling/models/code_formula_model.py @@ -97,7 +97,11 @@ def __init__( if self.enabled: self.device = decide_device( accelerator_options.device, - supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA], + supported_devices=[ + AcceleratorDevice.CPU, + AcceleratorDevice.CUDA, + AcceleratorDevice.XPU, + ], ) if artifacts_path is None: diff --git a/docling/utils/accelerator_utils.py b/docling/utils/accelerator_utils.py index 826b3657c5..3e50ea5811 100644 --- a/docling/utils/accelerator_utils.py +++ b/docling/utils/accelerator_utils.py @@ -22,6 +22,7 @@ def decide_device( has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available() has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available() + has_xpu = torch.xpu.is_available() if supported_devices is not None: if has_cuda and AcceleratorDevice.CUDA not in supported_devices: @@ -34,12 +35,19 @@ def decide_device( f"Removing MPS from available devices because it is not in {supported_devices=}" ) has_mps = False + if has_xpu and AcceleratorDevice.XPU not in supported_devices: + _log.info( + f"Removing XPU from available devices because it is not in {supported_devices=}" + ) + has_xpu = False if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto' if has_cuda: device = "cuda:0" elif has_mps: device = "mps" + elif has_xpu: + device = "xpu" elif accelerator_device.startswith("cuda"): if has_cuda: @@ -71,6 +79,12 @@ def decide_device( else: _log.warning("MPS is not available in the system. Fall back to 'CPU'") + elif accelerator_device == AcceleratorDevice.XPU.value: + if has_xpu: + device = "xpu" + else: + _log.warning("XPU is not available in the system. Fall back to 'CPU'") + elif accelerator_device == AcceleratorDevice.CPU.value: device = "cpu" diff --git a/docs/examples/compare_vlm_models.py b/docs/examples/compare_vlm_models.py index c795947c46..264b524369 100644 --- a/docs/examples/compare_vlm_models.py +++ b/docs/examples/compare_vlm_models.py @@ -141,7 +141,11 @@ def convert(sources: list[Path], converter: DocumentConverter): response_format=ResponseFormat.MARKDOWN, inference_framework=InferenceFramework.TRANSFORMERS, transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT, - supported_devices=[AcceleratorDevice.CUDA, AcceleratorDevice.CPU], + supported_devices=[ + AcceleratorDevice.CUDA, + AcceleratorDevice.CPU, + AcceleratorDevice.XPU, + ], scale=2.0, temperature=0.0, ) @@ -154,7 +158,11 @@ def convert(sources: list[Path], converter: DocumentConverter): inference_framework=InferenceFramework.TRANSFORMERS, transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT, transformers_prompt_style=TransformersPromptStyle.RAW, - supported_devices=[AcceleratorDevice.CUDA, AcceleratorDevice.CPU], + supported_devices=[ + AcceleratorDevice.CUDA, + AcceleratorDevice.CPU, + AcceleratorDevice.XPU, + ], scale=2.0, temperature=0.0, ) diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py index 5b78e5307c..ebbdd5590c 100644 --- a/docs/examples/run_with_accelerator.py +++ b/docs/examples/run_with_accelerator.py @@ -1,5 +1,5 @@ # %% [markdown] -# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA). +# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA/XPU). # # What this example does # - Shows how to select the accelerator device and thread count. @@ -7,13 +7,13 @@ # # How to run # - From the repo root: `python docs/examples/run_with_accelerator.py`. -# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA. +# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA/XPU. # # Notes # - EasyOCR does not support `cuda:N` device selection (defaults to `cuda:0`). # - `settings.debug.profile_pipeline_timings = True` prints profiling details. -# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` requires a compatible GPU and -# CUDA-enabled PyTorch build. CPU mode works everywhere. +# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` and `XPU` require a compatible GPU and +# CUDA/XPU-enabled PyTorch build. CPU mode works everywhere. # %% @@ -43,6 +43,9 @@ def main(): # num_threads=8, device=AcceleratorDevice.MPS # ) # accelerator_options = AcceleratorOptions( + # num_threads=8, device=AcceleratorDevice.XPU + # ) + # accelerator_options = AcceleratorOptions( # num_threads=8, device=AcceleratorDevice.CUDA # ) diff --git a/docs/usage/vision_models.md b/docs/usage/vision_models.md index d181ca950e..2cd0bdd831 100644 --- a/docs/usage/vision_models.md +++ b/docs/usage/vision_models.md @@ -105,6 +105,7 @@ pipeline_options = VlmPipelineOptions( AcceleratorDevice.CPU, AcceleratorDevice.CUDA, AcceleratorDevice.MPS, + AcceleratorDevice.XPU, ], scale=2.0, temperature=0.0, diff --git a/tests/test_options.py b/tests/test_options.py index 1e06378f99..2286a5c493 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -50,10 +50,13 @@ def test_accelerator_options(): # Use API ao2 = AcceleratorOptions(num_threads=2, device=AcceleratorDevice.MPS) ao3 = AcceleratorOptions(num_threads=3, device=AcceleratorDevice.CUDA) + ao4 = AcceleratorOptions(num_threads=4, device=AcceleratorDevice.XPU) assert ao2.num_threads == 2 assert ao2.device == AcceleratorDevice.MPS assert ao3.num_threads == 3 assert ao3.device == AcceleratorDevice.CUDA + assert ao4.num_threads == 4 + assert ao4.device == AcceleratorDevice.XPU # Use envvars (regular + alternative) and default values os.environ["OMP_NUM_THREADS"] = "1" @@ -67,15 +70,15 @@ def test_accelerator_options(): # Use envvars and override in init os.environ["DOCLING_DEVICE"] = "cpu" - ao4 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS) - assert ao4.num_threads == 5 - assert ao4.device == AcceleratorDevice.MPS + ao5 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS) + assert ao5.num_threads == 5 + assert ao5.device == AcceleratorDevice.MPS # Use regular and alternative envvar os.environ["DOCLING_NUM_THREADS"] = "2" - ao5 = AcceleratorOptions() - assert ao5.num_threads == 2 - assert ao5.device == AcceleratorDevice.CPU + ao6 = AcceleratorOptions() + assert ao6.num_threads == 2 + assert ao6.device == AcceleratorDevice.CPU # Use wrong values is_exception = False @@ -91,9 +94,9 @@ def test_accelerator_options(): del os.environ["DOCLING_NUM_THREADS"] del os.environ["DOCLING_DEVICE"] os.environ["OMP_NUM_THREADS"] = "wrong" - ao6 = AcceleratorOptions() - assert ao6.num_threads == 4 - assert ao6.device == AcceleratorDevice.AUTO + ao7 = AcceleratorOptions() + assert ao7.num_threads == 4 + assert ao7.device == AcceleratorDevice.AUTO def test_e2e_conversions(test_doc_path):