Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docling/datamodel/accelerator_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class AcceleratorDevice(str, Enum):
CPU = "cpu"
CUDA = "cuda"
MPS = "mps"
XPU = "xpu"


class AcceleratorOptions(BaseSettings):
Expand All @@ -30,13 +31,13 @@ class AcceleratorOptions(BaseSettings):

@field_validator("device")
def validate_device(cls, value):
# "auto", "cpu", "cuda", "mps", or "cuda:N"
# "auto", "cpu", "cuda", "mps", "xpu", or "cuda:N"
if value in {d.value for d in AcceleratorDevice} or re.match(
r"^cuda(:\d+)?$", value
):
return value
raise ValueError(
"Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
"Invalid device option. Use 'auto', 'cpu', 'mps', 'xpu', 'cuda', or 'cuda:N'."
)

@model_validator(mode="before")
Expand Down
1 change: 1 addition & 0 deletions docling/datamodel/layout_model_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class LayoutModelConfig(BaseModel):
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
]

@property
Expand Down
1 change: 1 addition & 0 deletions docling/datamodel/pipeline_options_asr_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class InlineAsrOptions(BaseAsrOptions):
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
]

@property
Expand Down
1 change: 1 addition & 0 deletions docling/datamodel/pipeline_options_vlm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class InlineVlmOptions(BaseVlmOptions):
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
]

stop_strings: List[str] = []
Expand Down
22 changes: 20 additions & 2 deletions docling/datamodel/vlm_model_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
extra_generation_config=dict(skip_special_tokens=False),
scale=2.0,
Expand Down Expand Up @@ -92,6 +93,7 @@
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
torch_dtype="bfloat16",
scale=2.0,
Expand All @@ -106,6 +108,7 @@
inference_framework=InferenceFramework.VLLM,
supported_devices=[
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand All @@ -123,6 +126,7 @@
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
# AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
torch_dtype="bfloat16",
scale=2.0,
Expand Down Expand Up @@ -150,6 +154,7 @@
inference_framework=InferenceFramework.VLLM,
supported_devices=[
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand All @@ -167,6 +172,7 @@
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand All @@ -179,6 +185,7 @@
inference_framework=InferenceFramework.VLLM,
supported_devices=[
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand All @@ -201,7 +208,11 @@
response_format=ResponseFormat.MARKDOWN,
inference_framework=InferenceFramework.TRANSFORMERS,
transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
)
Expand All @@ -224,7 +235,11 @@
response_format=ResponseFormat.MARKDOWN,
inference_framework=InferenceFramework.TRANSFORMERS,
transformers_model_type=TransformersModelType.AUTOMODEL_CAUSALLM,
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
extra_generation_config=dict(num_logits_to_keep=0),
Expand Down Expand Up @@ -253,6 +268,7 @@
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
# AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand Down Expand Up @@ -295,6 +311,7 @@
AcceleratorDevice.CUDA,
AcceleratorDevice.CPU,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand All @@ -313,6 +330,7 @@
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand Down
6 changes: 5 additions & 1 deletion docling/models/code_formula_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ def __init__(
if self.enabled:
self.device = decide_device(
accelerator_options.device,
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.XPU,
],
)

if artifacts_path is None:
Expand Down
14 changes: 14 additions & 0 deletions docling/utils/accelerator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def decide_device(

has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
has_xpu = torch.xpu.is_available()

if supported_devices is not None:
if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
Expand All @@ -34,12 +35,19 @@ def decide_device(
f"Removing MPS from available devices because it is not in {supported_devices=}"
)
has_mps = False
if has_xpu and AcceleratorDevice.XPU not in supported_devices:
_log.info(
f"Removing XPU from available devices because it is not in {supported_devices=}"
)
has_xpu = False

if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
if has_cuda:
device = "cuda:0"
elif has_mps:
device = "mps"
elif has_xpu:
device = "xpu"

elif accelerator_device.startswith("cuda"):
if has_cuda:
Expand Down Expand Up @@ -71,6 +79,12 @@ def decide_device(
else:
_log.warning("MPS is not available in the system. Fall back to 'CPU'")

elif accelerator_device == AcceleratorDevice.XPU.value:
if has_xpu:
device = "xpu"
else:
_log.warning("XPU is not available in the system. Fall back to 'CPU'")

elif accelerator_device == AcceleratorDevice.CPU.value:
device = "cpu"

Expand Down
12 changes: 10 additions & 2 deletions docs/examples/compare_vlm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ def convert(sources: list[Path], converter: DocumentConverter):
response_format=ResponseFormat.MARKDOWN,
inference_framework=InferenceFramework.TRANSFORMERS,
transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
supported_devices=[AcceleratorDevice.CUDA, AcceleratorDevice.CPU],
supported_devices=[
AcceleratorDevice.CUDA,
AcceleratorDevice.CPU,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
)
Expand All @@ -154,7 +158,11 @@ def convert(sources: list[Path], converter: DocumentConverter):
inference_framework=InferenceFramework.TRANSFORMERS,
transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
transformers_prompt_style=TransformersPromptStyle.RAW,
supported_devices=[AcceleratorDevice.CUDA, AcceleratorDevice.CPU],
supported_devices=[
AcceleratorDevice.CUDA,
AcceleratorDevice.CPU,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
)
Expand Down
11 changes: 7 additions & 4 deletions docs/examples/run_with_accelerator.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# %% [markdown]
# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA).
# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA/XPU).
#
# What this example does
# - Shows how to select the accelerator device and thread count.
# - Enables OCR and table structure to exercise compute paths, and prints timings.
#
# How to run
# - From the repo root: `python docs/examples/run_with_accelerator.py`.
# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA.
# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA/XPU.
#
# Notes
# - EasyOCR does not support `cuda:N` device selection (defaults to `cuda:0`).
# - `settings.debug.profile_pipeline_timings = True` prints profiling details.
# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` requires a compatible GPU and
# CUDA-enabled PyTorch build. CPU mode works everywhere.
# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` and `XPU` require a compatible GPU and
# CUDA/XPU-enabled PyTorch build. CPU mode works everywhere.

# %%

Expand Down Expand Up @@ -43,6 +43,9 @@ def main():
# num_threads=8, device=AcceleratorDevice.MPS
# )
# accelerator_options = AcceleratorOptions(
# num_threads=8, device=AcceleratorDevice.XPU
# )
# accelerator_options = AcceleratorOptions(
# num_threads=8, device=AcceleratorDevice.CUDA
# )

Expand Down
1 change: 1 addition & 0 deletions docs/usage/vision_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ pipeline_options = VlmPipelineOptions(
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
scale=2.0,
temperature=0.0,
Expand Down
21 changes: 12 additions & 9 deletions tests/test_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,13 @@ def test_accelerator_options():
# Use API
ao2 = AcceleratorOptions(num_threads=2, device=AcceleratorDevice.MPS)
ao3 = AcceleratorOptions(num_threads=3, device=AcceleratorDevice.CUDA)
ao4 = AcceleratorOptions(num_threads=4, device=AcceleratorDevice.XPU)
assert ao2.num_threads == 2
assert ao2.device == AcceleratorDevice.MPS
assert ao3.num_threads == 3
assert ao3.device == AcceleratorDevice.CUDA
assert ao4.num_threads == 4
assert ao4.device == AcceleratorDevice.XPU

# Use envvars (regular + alternative) and default values
os.environ["OMP_NUM_THREADS"] = "1"
Expand All @@ -67,15 +70,15 @@ def test_accelerator_options():

# Use envvars and override in init
os.environ["DOCLING_DEVICE"] = "cpu"
ao4 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS)
assert ao4.num_threads == 5
assert ao4.device == AcceleratorDevice.MPS
ao5 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS)
assert ao5.num_threads == 5
assert ao5.device == AcceleratorDevice.MPS

# Use regular and alternative envvar
os.environ["DOCLING_NUM_THREADS"] = "2"
ao5 = AcceleratorOptions()
assert ao5.num_threads == 2
assert ao5.device == AcceleratorDevice.CPU
ao6 = AcceleratorOptions()
assert ao6.num_threads == 2
assert ao6.device == AcceleratorDevice.CPU

# Use wrong values
is_exception = False
Expand All @@ -91,9 +94,9 @@ def test_accelerator_options():
del os.environ["DOCLING_NUM_THREADS"]
del os.environ["DOCLING_DEVICE"]
os.environ["OMP_NUM_THREADS"] = "wrong"
ao6 = AcceleratorOptions()
assert ao6.num_threads == 4
assert ao6.device == AcceleratorDevice.AUTO
ao7 = AcceleratorOptions()
assert ao7.num_threads == 4
assert ao7.device == AcceleratorDevice.AUTO


def test_e2e_conversions(test_doc_path):
Expand Down