diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md index 368a716cf3..0e42af9c9f 100644 --- a/docs/source/en/package_reference/inference_types.md +++ b/docs/source/en/package_reference/inference_types.md @@ -65,7 +65,9 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions -[[autodoc]] huggingface_hub.ChatCompletionInputToolType +[[autodoc]] huggingface_hub.ChatCompletionInputTool + +[[autodoc]] huggingface_hub.ChatCompletionInputToolChoiceClass [[autodoc]] huggingface_hub.ChatCompletionInputURL @@ -105,8 +107,6 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage -[[autodoc]] huggingface_hub.ToolElement - ## depth_estimation diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md index 6ae2736fba..7c04356455 100644 --- a/docs/source/ko/package_reference/inference_types.md +++ b/docs/source/ko/package_reference/inference_types.md @@ -64,7 +64,9 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions -[[autodoc]] huggingface_hub.ChatCompletionInputToolType +[[autodoc]] huggingface_hub.ChatCompletionInputTool + +[[autodoc]] huggingface_hub.ChatCompletionInputToolChoiceClass [[autodoc]] huggingface_hub.ChatCompletionInputURL @@ -104,8 +106,6 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage -[[autodoc]] huggingface_hub.ToolElement - ## depth_estimation[[huggingface_hub.DepthEstimationInput]] diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index a97263a7f9..a62dfd6c32 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -297,7 +297,9 @@ "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", "ChatCompletionInputStreamOptions", - "ChatCompletionInputToolType", + "ChatCompletionInputTool", + "ChatCompletionInputToolChoiceClass", + "ChatCompletionInputToolChoiceEnum", "ChatCompletionInputURL", "ChatCompletionOutput", "ChatCompletionOutputComplete", @@ -400,7 +402,6 @@ "TokenClassificationInput", "TokenClassificationOutputElement", "TokenClassificationParameters", - "ToolElement", "TranslationInput", "TranslationOutput", "TranslationParameters", @@ -827,7 +828,9 @@ def __dir__(): ChatCompletionInputMessageChunk, # noqa: F401 ChatCompletionInputMessageChunkType, # noqa: F401 ChatCompletionInputStreamOptions, # noqa: F401 - ChatCompletionInputToolType, # noqa: F401 + ChatCompletionInputTool, # noqa: F401 + ChatCompletionInputToolChoiceClass, # noqa: F401 + ChatCompletionInputToolChoiceEnum, # noqa: F401 ChatCompletionInputURL, # noqa: F401 ChatCompletionOutput, # noqa: F401 ChatCompletionOutputComplete, # noqa: F401 @@ -930,7 +933,6 @@ def __dir__(): TokenClassificationInput, # noqa: F401 TokenClassificationOutputElement, # noqa: F401 TokenClassificationParameters, # noqa: F401 - ToolElement, # noqa: F401 TranslationInput, # noqa: F401 TranslationOutput, # noqa: F401 TranslationParameters, # noqa: F401 diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 287b568380..e252cc7862 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -70,7 +70,9 @@ AutomaticSpeechRecognitionOutput, ChatCompletionInputGrammarType, ChatCompletionInputStreamOptions, - ChatCompletionInputToolType, + ChatCompletionInputTool, + ChatCompletionInputToolChoiceClass, + ChatCompletionInputToolChoiceEnum, ChatCompletionOutput, ChatCompletionStreamOutput, DocumentQuestionAnsweringOutputElement, @@ -79,6 +81,7 @@ ImageClassificationOutputTransform, ImageSegmentationOutputElement, ImageSegmentationSubtask, + ImageToImageTargetSize, ImageToTextOutput, ObjectDetectionOutputElement, QuestionAnsweringOutputElement, @@ -94,7 +97,6 @@ TextToSpeechEarlyStoppingEnum, TokenClassificationAggregationStrategy, TokenClassificationOutputElement, - ToolElement, TranslationOutput, TranslationTruncationStrategy, VisualQuestionAnsweringOutputElement, @@ -473,9 +475,9 @@ def chat_completion( # type: ignore stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> ChatCompletionOutput: ... @@ -498,9 +500,9 @@ def chat_completion( # type: ignore stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> Iterable[ChatCompletionStreamOutput]: ... @@ -523,9 +525,9 @@ def chat_completion( stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ... @@ -548,9 +550,9 @@ def chat_completion( stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: @@ -616,11 +618,11 @@ def chat_completion( top_p (`float`, *optional*): Fraction of the most likely next words to sample from. Must be between 0 and 1. Defaults to 1.0. - tool_choice ([`ChatCompletionInputToolType`] or `str`, *optional*): + tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*): The tool to use for the completion. Defaults to "auto". tool_prompt (`str`, *optional*): A prompt to be appended before the tools. - tools (List of [`ToolElement`], *optional*): + tools (List of [`ChatCompletionInputTool`], *optional*): A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. @@ -1224,12 +1226,11 @@ def image_to_image( image: ContentT, prompt: Optional[str] = None, *, - negative_prompt: Optional[str] = None, - height: Optional[int] = None, - width: Optional[int] = None, + negative_prompt: Optional[List[str]] = None, num_inference_steps: Optional[int] = None, guidance_scale: Optional[float] = None, model: Optional[str] = None, + target_size: Optional[ImageToImageTargetSize] = None, **kwargs, ) -> "Image": """ @@ -1246,21 +1247,19 @@ def image_to_image( The input image for translation. It can be raw bytes, an image file, or a URL to an online image. prompt (`str`, *optional*): The text prompt to guide the image generation. - negative_prompt (`str`, *optional*): - A negative prompt to guide the translation process. - height (`int`, *optional*): - The height in pixels of the generated image. - width (`int`, *optional*): - The width in pixels of the generated image. + negative_prompt (`List[str]`, *optional*): + One or several prompt to guide what NOT to include in image generation. num_inference_steps (`int`, *optional*): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. + For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher + quality image at the expense of slower inference. guidance_scale (`float`, *optional*): - Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. + For diffusion models. A higher guidance scale value encourages the model to generate images closely + linked to the text prompt at the expense of lower image quality. model (`str`, *optional*): The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + target_size (`ImageToImageTargetSize`, *optional*): + The size in pixel of the output image. Returns: `Image`: The translated image. @@ -1282,8 +1281,7 @@ def image_to_image( parameters = { "prompt": prompt, "negative_prompt": negative_prompt, - "height": height, - "width": width, + "target_size": target_size, "num_inference_steps": num_inference_steps, "guidance_scale": guidance_scale, **kwargs, @@ -2469,21 +2467,13 @@ def text_to_speech( Defaults to None. do_sample (`bool`, *optional*): Whether to use sampling instead of greedy decoding when generating new tokens. - early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"`, *optional*): + early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*): Controls the stopping condition for beam-based methods. epsilon_cutoff (`float`, *optional*): If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. - eta_cutoff (`float`, *optional*): - Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly - between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) - * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token - probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, - depending on the size of the model. See [Truncation Sampling as Language Model - Desmoothing](https://hf.co/papers/2210.15191) for more details. - float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff (`float`, *optional*): Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index 57ff5a1d49..6d3d278594 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -56,7 +56,9 @@ AutomaticSpeechRecognitionOutput, ChatCompletionInputGrammarType, ChatCompletionInputStreamOptions, - ChatCompletionInputToolType, + ChatCompletionInputTool, + ChatCompletionInputToolChoiceClass, + ChatCompletionInputToolChoiceEnum, ChatCompletionOutput, ChatCompletionStreamOutput, DocumentQuestionAnsweringOutputElement, @@ -65,6 +67,7 @@ ImageClassificationOutputTransform, ImageSegmentationOutputElement, ImageSegmentationSubtask, + ImageToImageTargetSize, ImageToTextOutput, ObjectDetectionOutputElement, QuestionAnsweringOutputElement, @@ -80,7 +83,6 @@ TextToSpeechEarlyStoppingEnum, TokenClassificationAggregationStrategy, TokenClassificationOutputElement, - ToolElement, TranslationOutput, TranslationTruncationStrategy, VisualQuestionAnsweringOutputElement, @@ -509,9 +511,9 @@ async def chat_completion( # type: ignore stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> ChatCompletionOutput: ... @@ -534,9 +536,9 @@ async def chat_completion( # type: ignore stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> AsyncIterable[ChatCompletionStreamOutput]: ... @@ -559,9 +561,9 @@ async def chat_completion( stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ... @@ -584,9 +586,9 @@ async def chat_completion( stop: Optional[List[str]] = None, stream_options: Optional[ChatCompletionInputStreamOptions] = None, temperature: Optional[float] = None, - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, tool_prompt: Optional[str] = None, - tools: Optional[List[ToolElement]] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: @@ -652,11 +654,11 @@ async def chat_completion( top_p (`float`, *optional*): Fraction of the most likely next words to sample from. Must be between 0 and 1. Defaults to 1.0. - tool_choice ([`ChatCompletionInputToolType`] or `str`, *optional*): + tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*): The tool to use for the completion. Defaults to "auto". tool_prompt (`str`, *optional*): A prompt to be appended before the tools. - tools (List of [`ToolElement`], *optional*): + tools (List of [`ChatCompletionInputTool`], *optional*): A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. @@ -1271,12 +1273,11 @@ async def image_to_image( image: ContentT, prompt: Optional[str] = None, *, - negative_prompt: Optional[str] = None, - height: Optional[int] = None, - width: Optional[int] = None, + negative_prompt: Optional[List[str]] = None, num_inference_steps: Optional[int] = None, guidance_scale: Optional[float] = None, model: Optional[str] = None, + target_size: Optional[ImageToImageTargetSize] = None, **kwargs, ) -> "Image": """ @@ -1293,21 +1294,19 @@ async def image_to_image( The input image for translation. It can be raw bytes, an image file, or a URL to an online image. prompt (`str`, *optional*): The text prompt to guide the image generation. - negative_prompt (`str`, *optional*): - A negative prompt to guide the translation process. - height (`int`, *optional*): - The height in pixels of the generated image. - width (`int`, *optional*): - The width in pixels of the generated image. + negative_prompt (`List[str]`, *optional*): + One or several prompt to guide what NOT to include in image generation. num_inference_steps (`int`, *optional*): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. + For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher + quality image at the expense of slower inference. guidance_scale (`float`, *optional*): - Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. + For diffusion models. A higher guidance scale value encourages the model to generate images closely + linked to the text prompt at the expense of lower image quality. model (`str`, *optional*): The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + target_size (`ImageToImageTargetSize`, *optional*): + The size in pixel of the output image. Returns: `Image`: The translated image. @@ -1330,8 +1329,7 @@ async def image_to_image( parameters = { "prompt": prompt, "negative_prompt": negative_prompt, - "height": height, - "width": width, + "target_size": target_size, "num_inference_steps": num_inference_steps, "guidance_scale": guidance_scale, **kwargs, @@ -2534,21 +2532,13 @@ async def text_to_speech( Defaults to None. do_sample (`bool`, *optional*): Whether to use sampling instead of greedy decoding when generating new tokens. - early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"`, *optional*): + early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*): Controls the stopping condition for beam-based methods. epsilon_cutoff (`float`, *optional*): If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. - eta_cutoff (`float`, *optional*): - Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly - between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) - * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token - probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, - depending on the size of the model. See [Truncation Sampling as Language Model - Desmoothing](https://hf.co/papers/2210.15191) for more details. - float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff (`float`, *optional*): Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index d59bae0ba3..a8b34c467b 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -29,7 +29,9 @@ ChatCompletionInputMessageChunk, ChatCompletionInputMessageChunkType, ChatCompletionInputStreamOptions, - ChatCompletionInputToolType, + ChatCompletionInputTool, + ChatCompletionInputToolChoiceClass, + ChatCompletionInputToolChoiceEnum, ChatCompletionInputURL, ChatCompletionOutput, ChatCompletionOutputComplete, @@ -49,7 +51,6 @@ ChatCompletionStreamOutputLogprobs, ChatCompletionStreamOutputTopLogprob, ChatCompletionStreamOutputUsage, - ToolElement, ) from .depth_estimation import DepthEstimationInput, DepthEstimationOutput from .document_question_answering import ( diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py index 7a1f297e4f..8caec602b0 100644 --- a/src/huggingface_hub/inference/_generated/types/chat_completion.py +++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py @@ -60,8 +60,11 @@ class ChatCompletionInputFunctionName(BaseInferenceType): @dataclass -class ChatCompletionInputToolType(BaseInferenceType): - function: Optional[ChatCompletionInputFunctionName] = None +class ChatCompletionInputToolChoiceClass(BaseInferenceType): + function: ChatCompletionInputFunctionName + + +ChatCompletionInputToolChoiceEnum = Literal["auto", "none", "required"] @dataclass @@ -72,7 +75,7 @@ class ChatCompletionInputFunctionDefinition(BaseInferenceType): @dataclass -class ToolElement(BaseInferenceType): +class ChatCompletionInputTool(BaseInferenceType): function: ChatCompletionInputFunctionDefinition type: str @@ -138,10 +141,10 @@ class ChatCompletionInput(BaseInferenceType): lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. """ - tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None tool_prompt: Optional[str] = None """A prompt to be appended before the tools""" - tools: Optional[List[ToolElement]] = None + tools: Optional[List[ChatCompletionInputTool]] = None """A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. diff --git a/utils/check_task_parameters.py b/utils/check_task_parameters.py index f0c45d9718..ea2da92f9e 100644 --- a/utils/check_task_parameters.py +++ b/utils/check_task_parameters.py @@ -71,7 +71,6 @@ "table_question_answering", "automatic_speech_recognition", "image_to_text", - "image_to_image", ] PARAMETERS_DATACLASS_REGEX = re.compile( @@ -312,7 +311,7 @@ def _update_parameters(self, params: cst.Parameters) -> cst.Parameters: new_param = cst.Param( name=cst.Name(param_name), annotation=annotation, - default=param_info["default_value"], + default=cst.Name(param_info["default_value"]), ) new_kwonly_params.append(new_param) # Return the updated parameters object with new and updated parameters