Skip to content

Commit

Permalink
[Bot] Update inference types (#2664)
Browse files Browse the repository at this point in the history
* Update inference types (automated commit)

* fix quality after merging main

* another fix

* fix tests

* Update inference types (automated commit)

* Update inference types (automated commit)

* fix quality

* Update inference types (automated commit)

* Update inference types (automated commit)

---------

Co-authored-by: Wauplin <11801849+Wauplin@users.noreply.github.com>
Co-authored-by: Celina Hanouti <hanouticelina@gmail.com>
  • Loading branch information
3 people authored Nov 28, 2024
1 parent 446e9c1 commit 503d353
Show file tree
Hide file tree
Showing 34 changed files with 138 additions and 222 deletions.
8 changes: 2 additions & 6 deletions docs/source/en/package_reference/inference_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,6 @@ This part of the lib is still under development and will be improved in future r

[[autodoc]] huggingface_hub.ZeroShotClassificationInput

[[autodoc]] huggingface_hub.ZeroShotClassificationInputData

[[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement

[[autodoc]] huggingface_hub.ZeroShotClassificationParameters
Expand All @@ -381,8 +379,6 @@ This part of the lib is still under development and will be improved in future r

[[autodoc]] huggingface_hub.ZeroShotImageClassificationInput

[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData

[[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement

[[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
Expand All @@ -395,6 +391,6 @@ This part of the lib is still under development and will be improved in future r

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters
8 changes: 2 additions & 6 deletions docs/source/ko/package_reference/inference_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,6 @@ rendered properly in your Markdown viewer.

[[autodoc]] huggingface_hub.ZeroShotClassificationInput

[[autodoc]] huggingface_hub.ZeroShotClassificationInputData

[[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement

[[autodoc]] huggingface_hub.ZeroShotClassificationParameters
Expand All @@ -380,8 +378,6 @@ rendered properly in your Markdown viewer.

[[autodoc]] huggingface_hub.ZeroShotImageClassificationInput

[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData

[[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement

[[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
Expand All @@ -394,6 +390,6 @@ rendered properly in your Markdown viewer.

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement

[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters
8 changes: 2 additions & 6 deletions src/huggingface_hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,17 +416,15 @@
"VisualQuestionAnsweringOutputElement",
"VisualQuestionAnsweringParameters",
"ZeroShotClassificationInput",
"ZeroShotClassificationInputData",
"ZeroShotClassificationOutputElement",
"ZeroShotClassificationParameters",
"ZeroShotImageClassificationInput",
"ZeroShotImageClassificationInputData",
"ZeroShotImageClassificationOutputElement",
"ZeroShotImageClassificationParameters",
"ZeroShotObjectDetectionBoundingBox",
"ZeroShotObjectDetectionInput",
"ZeroShotObjectDetectionInputData",
"ZeroShotObjectDetectionOutputElement",
"ZeroShotObjectDetectionParameters",
],
"inference_api": [
"InferenceApi",
Expand Down Expand Up @@ -947,17 +945,15 @@ def __dir__():
VisualQuestionAnsweringOutputElement, # noqa: F401
VisualQuestionAnsweringParameters, # noqa: F401
ZeroShotClassificationInput, # noqa: F401
ZeroShotClassificationInputData, # noqa: F401
ZeroShotClassificationOutputElement, # noqa: F401
ZeroShotClassificationParameters, # noqa: F401
ZeroShotImageClassificationInput, # noqa: F401
ZeroShotImageClassificationInputData, # noqa: F401
ZeroShotImageClassificationOutputElement, # noqa: F401
ZeroShotImageClassificationParameters, # noqa: F401
ZeroShotObjectDetectionBoundingBox, # noqa: F401
ZeroShotObjectDetectionInput, # noqa: F401
ZeroShotObjectDetectionInputData, # noqa: F401
ZeroShotObjectDetectionOutputElement, # noqa: F401
ZeroShotObjectDetectionParameters, # noqa: F401
)
from .inference_api import InferenceApi # noqa: F401
from .keras_mixin import (
Expand Down
25 changes: 13 additions & 12 deletions src/huggingface_hub/inference/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def audio_classification(
top_k (`int`, *optional*):
When specified, limits the output to the top K most probable classes.
function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
The function to apply to the output.
The function to apply to the model outputs in order to retrieve the scores.
Returns:
`List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
Expand Down Expand Up @@ -982,7 +982,7 @@ def document_question_answering(
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient()
>>> client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16, words=None)]
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
```
"""
inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
Expand Down Expand Up @@ -1133,7 +1133,7 @@ def image_classification(
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
The function to apply to the output.
The function to apply to the model outputs in order to retrieve the scores.
top_k (`int`, *optional*):
When specified, limits the output to the top K most probable classes.
Returns:
Expand Down Expand Up @@ -1812,7 +1812,7 @@ def text_classification(
top_k (`int`, *optional*):
When specified, limits the output to the top K most probable classes.
function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
The function to apply to the output.
The function to apply to the model outputs in order to retrieve the scores.
Returns:
`List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
Expand Down Expand Up @@ -2484,11 +2484,11 @@ def text_to_speech(
max_length (`int`, *optional*):
The maximum length (in tokens) of the generated text, including the input.
max_new_tokens (`int`, *optional*):
The maximum number of tokens to generate. Takes precedence over maxLength.
The maximum number of tokens to generate. Takes precedence over max_length.
min_length (`int`, *optional*):
The minimum length (in tokens) of the generated text, including the input.
min_new_tokens (`int`, *optional*):
The minimum number of tokens to generate. Takes precedence over maxLength.
The minimum number of tokens to generate. Takes precedence over min_length.
num_beam_groups (`int`, *optional*):
Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
See [this paper](https://hf.co/papers/1610.02424) for more details.
Expand Down Expand Up @@ -2791,12 +2791,13 @@ def zero_shot_classification(
the label likelihoods for each sequence is 1. If true, the labels are considered independent and
probabilities are normalized for each candidate.
hypothesis_template (`str`, *optional*):
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
the placeholder with the candidate labels.
The sentence used in conjunction with `candidate_labels` to attempt the text classification by
replacing the placeholder with the candidate labels.
model (`str`, *optional*):
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
Returns:
`List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
Expand Down Expand Up @@ -2887,12 +2888,12 @@ def zero_shot_image_classification(
self,
image: ContentT,
# temporarily keeping it optional for backward compatibility.
candidate_labels: Optional[List[str]] = None,
candidate_labels: List[str] = None, # type: ignore
*,
model: Optional[str] = None,
hypothesis_template: Optional[str] = None,
# deprecated argument
labels: Optional[List[str]] = None, # type: ignore
labels: List[str] = None, # type: ignore
) -> List[ZeroShotImageClassificationOutputElement]:
"""
Provide input image and text labels to predict text labels for the image.
Expand All @@ -2908,8 +2909,8 @@ def zero_shot_image_classification(
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
hypothesis_template (`str`, *optional*):
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
the placeholder with the candidate labels.
The sentence used in conjunction with `candidate_labels` to attempt the image classification by
replacing the placeholder with the candidate labels.
Returns:
`List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
Expand Down
25 changes: 13 additions & 12 deletions src/huggingface_hub/inference/_generated/_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ async def audio_classification(
top_k (`int`, *optional*):
When specified, limits the output to the top K most probable classes.
function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
The function to apply to the output.
The function to apply to the model outputs in order to retrieve the scores.
Returns:
`List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
Expand Down Expand Up @@ -1025,7 +1025,7 @@ async def document_question_answering(
>>> from huggingface_hub import AsyncInferenceClient
>>> client = AsyncInferenceClient()
>>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16, words=None)]
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
```
"""
inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
Expand Down Expand Up @@ -1178,7 +1178,7 @@ async def image_classification(
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
The function to apply to the output.
The function to apply to the model outputs in order to retrieve the scores.
top_k (`int`, *optional*):
When specified, limits the output to the top K most probable classes.
Returns:
Expand Down Expand Up @@ -1874,7 +1874,7 @@ async def text_classification(
top_k (`int`, *optional*):
When specified, limits the output to the top K most probable classes.
function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
The function to apply to the output.
The function to apply to the model outputs in order to retrieve the scores.
Returns:
`List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
Expand Down Expand Up @@ -2549,11 +2549,11 @@ async def text_to_speech(
max_length (`int`, *optional*):
The maximum length (in tokens) of the generated text, including the input.
max_new_tokens (`int`, *optional*):
The maximum number of tokens to generate. Takes precedence over maxLength.
The maximum number of tokens to generate. Takes precedence over max_length.
min_length (`int`, *optional*):
The minimum length (in tokens) of the generated text, including the input.
min_new_tokens (`int`, *optional*):
The minimum number of tokens to generate. Takes precedence over maxLength.
The minimum number of tokens to generate. Takes precedence over min_length.
num_beam_groups (`int`, *optional*):
Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
See [this paper](https://hf.co/papers/1610.02424) for more details.
Expand Down Expand Up @@ -2860,12 +2860,13 @@ async def zero_shot_classification(
the label likelihoods for each sequence is 1. If true, the labels are considered independent and
probabilities are normalized for each candidate.
hypothesis_template (`str`, *optional*):
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
the placeholder with the candidate labels.
The sentence used in conjunction with `candidate_labels` to attempt the text classification by
replacing the placeholder with the candidate labels.
model (`str`, *optional*):
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
Returns:
`List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
Expand Down Expand Up @@ -2958,12 +2959,12 @@ async def zero_shot_image_classification(
self,
image: ContentT,
# temporarily keeping it optional for backward compatibility.
candidate_labels: Optional[List[str]] = None,
candidate_labels: List[str] = None, # type: ignore
*,
model: Optional[str] = None,
hypothesis_template: Optional[str] = None,
# deprecated argument
labels: Optional[List[str]] = None, # type: ignore
labels: List[str] = None, # type: ignore
) -> List[ZeroShotImageClassificationOutputElement]:
"""
Provide input image and text labels to predict text labels for the image.
Expand All @@ -2979,8 +2980,8 @@ async def zero_shot_image_classification(
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
hypothesis_template (`str`, *optional*):
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
the placeholder with the candidate labels.
The sentence used in conjunction with `candidate_labels` to attempt the image classification by
replacing the placeholder with the candidate labels.
Returns:
`List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
Expand Down
4 changes: 1 addition & 3 deletions src/huggingface_hub/inference/_generated/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,17 @@
)
from .zero_shot_classification import (
ZeroShotClassificationInput,
ZeroShotClassificationInputData,
ZeroShotClassificationOutputElement,
ZeroShotClassificationParameters,
)
from .zero_shot_image_classification import (
ZeroShotImageClassificationInput,
ZeroShotImageClassificationInputData,
ZeroShotImageClassificationOutputElement,
ZeroShotImageClassificationParameters,
)
from .zero_shot_object_detection import (
ZeroShotObjectDetectionBoundingBox,
ZeroShotObjectDetectionInput,
ZeroShotObjectDetectionInputData,
ZeroShotObjectDetectionOutputElement,
ZeroShotObjectDetectionParameters,
)
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@

@dataclass
class AudioClassificationParameters(BaseInferenceType):
"""Additional inference parameters
Additional inference parameters for Audio Classification
"""
"""Additional inference parameters for Audio Classification"""

function_to_apply: Optional["AudioClassificationOutputTransform"] = None
"""The function to apply to the output."""
"""The function to apply to the model outputs in order to retrieve the scores."""
top_k: Optional[int] = None
"""When specified, limits the output to the top K most probable classes."""

Expand All @@ -33,7 +31,7 @@ class AudioClassificationInput(BaseInferenceType):
also provide the audio data as a raw bytes payload.
"""
parameters: Optional[AudioClassificationParameters] = None
"""Additional inference parameters"""
"""Additional inference parameters for Audio Classification"""


@dataclass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@

@dataclass
class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
"""Parametrization of the text generation process
Ad-hoc parametrization of the text generation process
"""
"""Parametrization of the text generation process"""

do_sample: Optional[bool] = None
"""Whether to use sampling instead of greedy decoding when generating new tokens."""
Expand Down Expand Up @@ -76,11 +74,9 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):

@dataclass
class AutomaticSpeechRecognitionParameters(BaseInferenceType):
"""Additional inference parameters
Additional inference parameters for Automatic Speech Recognition
"""
"""Additional inference parameters for Automatic Speech Recognition"""

generate: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
"""Parametrization of the text generation process"""
return_timestamps: Optional[bool] = None
"""Whether to output corresponding timestamps with the generated text"""
Expand All @@ -95,7 +91,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
also provide the audio data as a raw bytes payload.
"""
parameters: Optional[AutomaticSpeechRecognitionParameters] = None
"""Additional inference parameters"""
"""Additional inference parameters for Automatic Speech Recognition"""


@dataclass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class DepthEstimationInput(BaseInferenceType):
inputs: Any
"""The input image data"""
parameters: Optional[Dict[str, Any]] = None
"""Additional inference parameters"""
"""Additional inference parameters for Depth Estimation"""


@dataclass
Expand Down
Loading

0 comments on commit 503d353

Please sign in to comment.