From 4374a1e8ecd0a882057c9310927ee689c89e4a61 Mon Sep 17 00:00:00 2001 From: Wauplin <11801849+Wauplin@users.noreply.github.com> Date: Sat, 6 Dec 2025 03:19:55 +0000 Subject: [PATCH] Update inference types (automated commit) --- .../en/package_reference/inference_types.md | 24 +++++++ .../ko/package_reference/inference_types.md | 24 +++++++ .../inference/_generated/types/__init__.py | 12 ++++ .../_generated/types/image_text_to_image.py | 67 +++++++++++++++++++ .../_generated/types/image_text_to_video.py | 65 ++++++++++++++++++ .../types/zero_shot_object_detection.py | 1 + 6 files changed, 193 insertions(+) create mode 100644 src/huggingface_hub/inference/_generated/types/image_text_to_image.py create mode 100644 src/huggingface_hub/inference/_generated/types/image_text_to_video.py diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md index 535994221d..ac9d9b10ca 100644 --- a/docs/source/en/package_reference/inference_types.md +++ b/docs/source/en/package_reference/inference_types.md @@ -173,6 +173,30 @@ This part of the lib is still under development and will be improved in future r +## image_text_to_image + +[[autodoc]] huggingface_hub.ImageTextToImageInput + +[[autodoc]] huggingface_hub.ImageTextToImageOutput + +[[autodoc]] huggingface_hub.ImageTextToImageParameters + +[[autodoc]] huggingface_hub.ImageTextToImageTargetSize + + + +## image_text_to_video + +[[autodoc]] huggingface_hub.ImageTextToVideoInput + +[[autodoc]] huggingface_hub.ImageTextToVideoOutput + +[[autodoc]] huggingface_hub.ImageTextToVideoParameters + +[[autodoc]] huggingface_hub.ImageTextToVideoTargetSize + + + ## image_to_image [[autodoc]] huggingface_hub.ImageToImageInput diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md index 84dda55956..c579e3e725 100644 --- a/docs/source/ko/package_reference/inference_types.md +++ b/docs/source/ko/package_reference/inference_types.md @@ -172,6 +172,30 @@ rendered properly in your Markdown viewer. +## image_text_to_image[[huggingface_hub.ImageTextToImageInput]] + +[[autodoc]] huggingface_hub.ImageTextToImageInput + +[[autodoc]] huggingface_hub.ImageTextToImageOutput + +[[autodoc]] huggingface_hub.ImageTextToImageParameters + +[[autodoc]] huggingface_hub.ImageTextToImageTargetSize + + + +## image_text_to_video[[huggingface_hub.ImageTextToVideoInput]] + +[[autodoc]] huggingface_hub.ImageTextToVideoInput + +[[autodoc]] huggingface_hub.ImageTextToVideoOutput + +[[autodoc]] huggingface_hub.ImageTextToVideoParameters + +[[autodoc]] huggingface_hub.ImageTextToVideoTargetSize + + + ## image_to_image[[huggingface_hub.ImageToImageInput]] [[autodoc]] huggingface_hub.ImageToImageInput diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index bfffc0ae3b..9f95dca555 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -77,6 +77,18 @@ ImageSegmentationParameters, ImageSegmentationSubtask, ) +from .image_text_to_image import ( + ImageTextToImageInput, + ImageTextToImageOutput, + ImageTextToImageParameters, + ImageTextToImageTargetSize, +) +from .image_text_to_video import ( + ImageTextToVideoInput, + ImageTextToVideoOutput, + ImageTextToVideoParameters, + ImageTextToVideoTargetSize, +) from .image_to_image import ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToImageTargetSize from .image_to_text import ( ImageToTextEarlyStoppingEnum, diff --git a/src/huggingface_hub/inference/_generated/types/image_text_to_image.py b/src/huggingface_hub/inference/_generated/types/image_text_to_image.py new file mode 100644 index 0000000000..1ddd15335a --- /dev/null +++ b/src/huggingface_hub/inference/_generated/types/image_text_to_image.py @@ -0,0 +1,67 @@ +# Inference code generated from the JSON schema spec in @huggingface/tasks. +# +# See: +# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts +# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. +from typing import Any, Optional + +from .base import BaseInferenceType, dataclass_with_extra + + +@dataclass_with_extra +class ImageTextToImageTargetSize(BaseInferenceType): + """The size in pixels of the output image. This parameter is only supported by some + providers and for specific models. It will be ignored when unsupported. + """ + + height: int + width: int + + +@dataclass_with_extra +class ImageTextToImageParameters(BaseInferenceType): + """Additional inference parameters for Image Text To Image""" + + guidance_scale: Optional[float] = None + """For diffusion models. A higher guidance scale value encourages the model to generate + images closely linked to the text prompt at the expense of lower image quality. + """ + negative_prompt: Optional[str] = None + """One prompt to guide what NOT to include in image generation.""" + num_inference_steps: Optional[int] = None + """For diffusion models. The number of denoising steps. More denoising steps usually lead to + a higher quality image at the expense of slower inference. + """ + prompt: Optional[str] = None + """The text prompt to guide the image generation. Either this or inputs (image) must be + provided. + """ + seed: Optional[int] = None + """Seed for the random number generator.""" + target_size: Optional[ImageTextToImageTargetSize] = None + """The size in pixels of the output image. This parameter is only supported by some + providers and for specific models. It will be ignored when unsupported. + """ + + +@dataclass_with_extra +class ImageTextToImageInput(BaseInferenceType): + """Inputs for Image Text To Image inference. Either inputs (image) or prompt (in parameters) + must be provided, or both. + """ + + inputs: Optional[str] = None + """The input image data as a base64-encoded string. If no `parameters` are provided, you can + also provide the image data as a raw bytes payload. Either this or prompt must be + provided. + """ + parameters: Optional[ImageTextToImageParameters] = None + """Additional inference parameters for Image Text To Image""" + + +@dataclass_with_extra +class ImageTextToImageOutput(BaseInferenceType): + """Outputs of inference for the Image Text To Image task""" + + image: Any + """The generated image returned as raw bytes in the payload.""" diff --git a/src/huggingface_hub/inference/_generated/types/image_text_to_video.py b/src/huggingface_hub/inference/_generated/types/image_text_to_video.py new file mode 100644 index 0000000000..58b3a4f24e --- /dev/null +++ b/src/huggingface_hub/inference/_generated/types/image_text_to_video.py @@ -0,0 +1,65 @@ +# Inference code generated from the JSON schema spec in @huggingface/tasks. +# +# See: +# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts +# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. +from typing import Any, Optional + +from .base import BaseInferenceType, dataclass_with_extra + + +@dataclass_with_extra +class ImageTextToVideoTargetSize(BaseInferenceType): + """The size in pixel of the output video frames.""" + + height: int + width: int + + +@dataclass_with_extra +class ImageTextToVideoParameters(BaseInferenceType): + """Additional inference parameters for Image Text To Video""" + + guidance_scale: Optional[float] = None + """For diffusion models. A higher guidance scale value encourages the model to generate + videos closely linked to the text prompt at the expense of lower image quality. + """ + negative_prompt: Optional[str] = None + """One prompt to guide what NOT to include in video generation.""" + num_frames: Optional[float] = None + """The num_frames parameter determines how many video frames are generated.""" + num_inference_steps: Optional[int] = None + """The number of denoising steps. More denoising steps usually lead to a higher quality + video at the expense of slower inference. + """ + prompt: Optional[str] = None + """The text prompt to guide the video generation. Either this or inputs (image) must be + provided. + """ + seed: Optional[int] = None + """Seed for the random number generator.""" + target_size: Optional[ImageTextToVideoTargetSize] = None + """The size in pixel of the output video frames.""" + + +@dataclass_with_extra +class ImageTextToVideoInput(BaseInferenceType): + """Inputs for Image Text To Video inference. Either inputs (image) or prompt (in parameters) + must be provided, or both. + """ + + inputs: Optional[str] = None + """The input image data as a base64-encoded string. If no `parameters` are provided, you can + also provide the image data as a raw bytes payload. Either this or prompt must be + provided. + """ + parameters: Optional[ImageTextToVideoParameters] = None + """Additional inference parameters for Image Text To Video""" + + +@dataclass_with_extra +class ImageTextToVideoOutput(BaseInferenceType): + """Outputs of inference for the Image Text To Video task""" + + video: Any + """The generated video returned as raw bytes in the payload.""" diff --git a/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py b/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py index e981463b25..d9512c77fd 100644 --- a/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +++ b/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py @@ -3,6 +3,7 @@ # See: # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. + from .base import BaseInferenceType, dataclass_with_extra