From 40cf9317b19e4b83a1fff6b8799ea6bbb6d2b5e3 Mon Sep 17 00:00:00 2001 From: Florian-BACHO Date: Mon, 27 Oct 2025 10:42:48 +0100 Subject: [PATCH 1/6] Change use_file_api to file_mode --- .../llama_index/llms/google_genai/base.py | 49 +++++++++---------- .../llama_index/llms/google_genai/utils.py | 46 ++++++++++------- .../pyproject.toml | 2 +- .../llama-index-llms-google-genai/uv.lock | 4 +- 4 files changed, 54 insertions(+), 47 deletions(-) diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py index 69e5ad4055..84a45e5b60 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py @@ -16,6 +16,7 @@ Type, Union, Callable, + Literal, ) @@ -139,9 +140,9 @@ class GoogleGenAI(FunctionCallingLLM): default=None, description="Google GenAI tool to use for the model to augment responses.", ) - use_file_api: bool = Field( - default=True, - description="Whether or not to use the FileAPI for large files (>20MB).", + file_mode: Literal["inline", "fileapi", "hybrid"] = Field( + default="hybrid", + description="Whether to use inline-only, FileAPI-only or both for handling files.", ) _max_tokens: int = PrivateAttr() @@ -165,7 +166,7 @@ def __init__( is_function_calling_model: bool = True, cached_content: Optional[str] = None, built_in_tool: Optional[types.Tool] = None, - use_file_api: bool = True, + file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid", **kwargs: Any, ): # API keys are optional. The API can be authorised via OAuth (detected @@ -214,7 +215,7 @@ def __init__( max_retries=max_retries, cached_content=cached_content, built_in_tool=built_in_tool, - use_file_api=use_file_api, + file_mode=file_mode, **kwargs, ) @@ -309,7 +310,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any): params = {**kwargs, "generation_config": generation_config} next_msg, chat_kwargs = asyncio.run( prepare_chat_params( - self.model, messages, self.use_file_api, self._client, **params + self.model, messages, self.file_mode, self._client, **params ) ) chat = self._client.chats.create(**chat_kwargs) @@ -317,7 +318,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any): next_msg.parts if isinstance(next_msg, types.Content) else next_msg ) - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): asyncio.run( delete_uploaded_files([*chat_kwargs["history"], next_msg], self._client) ) @@ -332,14 +333,14 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any): } params = {**kwargs, "generation_config": generation_config} next_msg, chat_kwargs = await prepare_chat_params( - self.model, messages, self.use_file_api, self._client, **params + self.model, messages, self.file_mode, self._client, **params ) chat = self._client.aio.chats.create(**chat_kwargs) response = await chat.send_message( next_msg.parts if isinstance(next_msg, types.Content) else next_msg ) - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): await delete_uploaded_files( [*chat_kwargs["history"], next_msg], self._client ) @@ -366,7 +367,7 @@ def _stream_chat( params = {**kwargs, "generation_config": generation_config} next_msg, chat_kwargs = asyncio.run( prepare_chat_params( - self.model, messages, self.use_file_api, self._client, **params + self.model, messages, self.file_mode, self._client, **params ) ) chat = self._client.chats.create(**chat_kwargs) @@ -399,7 +400,7 @@ def gen() -> ChatResponseGen: llama_resp.message.additional_kwargs["tool_calls"] = existing_tool_calls yield llama_resp - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): asyncio.run( delete_uploaded_files( [*chat_kwargs["history"], next_msg], self._client @@ -423,7 +424,7 @@ async def _astream_chat( } params = {**kwargs, "generation_config": generation_config} next_msg, chat_kwargs = await prepare_chat_params( - self.model, messages, self.use_file_api, self._client, **params + self.model, messages, self.file_mode, self._client, **params ) chat = self._client.aio.chats.create(**chat_kwargs) @@ -463,7 +464,7 @@ async def gen() -> ChatResponseAsyncGen: ) yield llama_resp - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): await delete_uploaded_files( [*chat_kwargs["history"], next_msg], self._client ) @@ -586,9 +587,7 @@ def structured_predict_without_function_calling( messages = prompt.format_messages(**prompt_args) contents = [ - asyncio.run( - chat_message_to_gemini(message, self.use_file_api, self._client) - ) + asyncio.run(chat_message_to_gemini(message, self.file_mode, self._client)) for message in messages ] response = self._client.models.generate_content( @@ -605,7 +604,7 @@ def structured_predict_without_function_calling( }, ) - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): asyncio.run(delete_uploaded_files(contents, self._client)) if isinstance(response.parsed, BaseModel): @@ -637,7 +636,7 @@ def structured_predict( messages = prompt.format_messages(**prompt_args) contents = [ asyncio.run( - chat_message_to_gemini(message, self.use_file_api, self._client) + chat_message_to_gemini(message, self.file_mode, self._client) ) for message in messages ] @@ -647,7 +646,7 @@ def structured_predict( config=generation_config, ) - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): asyncio.run(delete_uploaded_files(contents, self._client)) if isinstance(response.parsed, BaseModel): @@ -684,7 +683,7 @@ async def astructured_predict( messages = prompt.format_messages(**prompt_args) contents = await asyncio.gather( *[ - chat_message_to_gemini(message, self.use_file_api, self._client) + chat_message_to_gemini(message, self.file_mode, self._client) for message in messages ] ) @@ -694,7 +693,7 @@ async def astructured_predict( config=generation_config, ) - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): await delete_uploaded_files(contents, self._client) if isinstance(response.parsed, BaseModel): @@ -731,7 +730,7 @@ def stream_structured_predict( messages = prompt.format_messages(**prompt_args) contents = [ asyncio.run( - chat_message_to_gemini(message, self.use_file_api, self._client) + chat_message_to_gemini(message, self.file_mode, self._client) ) for message in messages ] @@ -758,7 +757,7 @@ def gen() -> Generator[Union[Model, FlexibleModel], None, None]: if streaming_model: yield streaming_model - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): asyncio.run(delete_uploaded_files(contents, self._client)) return gen() @@ -791,7 +790,7 @@ async def astream_structured_predict( messages = prompt.format_messages(**prompt_args) contents = await asyncio.gather( *[ - chat_message_to_gemini(message, self.use_file_api, self._client) + chat_message_to_gemini(message, self.file_mode, self._client) for message in messages ] ) @@ -818,7 +817,7 @@ async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]: if streaming_model: yield streaming_model - if self.use_file_api: + if self.file_mode in ("fileapi", "hybrid"): await delete_uploaded_files(contents, self._client) return gen() diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py index fe02a5a69b..6f32d77300 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py @@ -10,6 +10,7 @@ Optional, Type, Tuple, + Literal, ) import typing @@ -222,25 +223,30 @@ def chat_from_gemini_response( async def create_file_part( - file_bytes: bytes, mime_type: str, use_file_api: bool, client: Optional[Client] + file_buffer: BytesIO, + mime_type: str, + file_mode: Literal["inline", "fileapi", "hybrid"], + client: Optional[Client], ) -> types.PartUnion: """Create a Part or File object for the given file depending on its size.""" - if ( - not use_file_api - or len(file_bytes) - < 20 * 1024 * 1024 # 20MB is the Gemini inline data size limit - ): - return types.Part.from_bytes( - data=file_bytes, - mime_type=mime_type, - ) + if file_mode in ("inline", "hybrid"): + file_buffer.seek(0, 2) # Seek to end + size = file_buffer.tell() # Get file size + file_buffer.seek(0) # Reset to beginning + + if size < 20 * 1024 * 1024: # 20MB is the Gemini inline data size limit + return types.Part.from_bytes( + data=file_buffer.read(), + mime_type=mime_type, + ) + elif file_mode == "inline": + raise ValueError("Files in inline mode must be smaller than 20MB.") if client is None: raise ValueError("A Google GenAI client must be provided for use with FileAPI.") - buffer = BytesIO(file_bytes) file = await client.aio.files.upload( - file=buffer, config=types.UploadFileConfig(mime_type=mime_type) + file=file_buffer, config=types.UploadFileConfig(mime_type=mime_type) ) # Wait for file processing @@ -268,7 +274,9 @@ async def delete_uploaded_files( async def chat_message_to_gemini( - message: ChatMessage, use_file_api: bool = False, client: Optional[Client] = None + message: ChatMessage, + file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid", + client: Optional[Client] = None, ) -> Union[types.Content, types.File]: """Convert ChatMessages to Gemini-specific history, including ImageDocuments.""" parts = [] @@ -278,7 +286,7 @@ async def chat_message_to_gemini( if block.text: part = types.Part.from_text(text=block.text) elif isinstance(block, ImageBlock): - file_bytes = block.resolve_image(as_base64=False).read() + file_buffer = block.resolve_image(as_base64=False) mime_type = ( block.image_mimetype @@ -286,13 +294,12 @@ async def chat_message_to_gemini( else "image/jpeg" # TODO: Fail? ) - part = await create_file_part(file_bytes, mime_type, use_file_api, client) + part = await create_file_part(file_buffer, mime_type, file_mode, client) if isinstance(part, types.File): return part # Return the file as it is a message content and not a part elif isinstance(block, VideoBlock): file_buffer = block.resolve_video(as_base64=False) - file_bytes = file_buffer.read() mime_type = ( block.video_mimetype @@ -300,7 +307,7 @@ async def chat_message_to_gemini( else "video/mp4" # TODO: Fail? ) - part = await create_file_part(file_bytes, mime_type, use_file_api, client) + part = await create_file_part(file_buffer, mime_type, file_mode, client) if isinstance(part, types.File): return part # Return the file as it is a message content and not a part @@ -309,13 +316,14 @@ async def chat_message_to_gemini( elif isinstance(block, DocumentBlock): file_buffer = block.resolve_document() - file_bytes = file_buffer.read() + mime_type = ( block.document_mimetype if block.document_mimetype is not None else "application/pdf" ) - part = await create_file_part(file_bytes, mime_type, use_file_api, client) + + part = await create_file_part(file_buffer, mime_type, file_mode, client) if isinstance(part, types.File): return part # Return the file as it is a message content and not a part diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml index 181ceda88c..5c333fa164 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml @@ -27,7 +27,7 @@ dev = [ [project] name = "llama-index-llms-google-genai" -version = "0.6.2" +version = "0.7.0" description = "llama-index llms google genai integration" authors = [{name = "Your Name", email = "you@example.com"}] requires-python = ">=3.9,<4.0" diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock b/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock index f236f5cb34..6c94ce251b 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9, <4.0" resolution-markers = [ "python_full_version >= '3.11'", @@ -1706,7 +1706,7 @@ wheels = [ [[package]] name = "llama-index-llms-google-genai" -version = "0.6.2" +version = "0.7.0" source = { editable = "." } dependencies = [ { name = "google-genai" }, From 597111b2e769fbe458f640e4d0ea928d653fc8f7 Mon Sep 17 00:00:00 2001 From: Florian-BACHO Date: Mon, 27 Oct 2025 10:59:23 +0100 Subject: [PATCH 2/6] Fix lint --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index af76f376c0..3335d5bf0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,52 +5,67 @@ ## [2025-10-26] ### llama-index-core [0.14.6] + - Add allow_parallel_tool_calls for non-streaming ([#20117](https://github.com/run-llama/llama_index/pull/20117)) - Fix invalid use of field-specific metadata ([#20122](https://github.com/run-llama/llama_index/pull/20122)) - update doc for SemanticSplitterNodeParser ([#20125](https://github.com/run-llama/llama_index/pull/20125)) - fix rare cases when sentence splits are larger than chunk size ([#20147](https://github.com/run-llama/llama_index/pull/20147)) ### llama-index-embeddings-bedrock [0.7.0] + - Fix BedrockEmbedding to support Cohere v4 response format ([#20094](https://github.com/run-llama/llama_index/pull/20094)) ### llama-index-embeddings-isaacus [0.1.0] + - feat: Isaacus embeddings integration ([#20124](https://github.com/run-llama/llama_index/pull/20124)) ### llama-index-embeddings-oci-genai [0.4.2] + - Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146)) ### llama-index-llms-anthropic [0.9.7] + - Fix double token stream in anthropic llm ([#20108](https://github.com/run-llama/llama_index/pull/20108)) - Ensure anthropic content delta only has user facing response ([#20113](https://github.com/run-llama/llama_index/pull/20113)) ### llama-index-llms-baseten [0.1.7] + - add GLM ([#20121](https://github.com/run-llama/llama_index/pull/20121)) ### llama-index-llms-helicone [0.1.0] + - integrate helicone to llama-index ([#20131](https://github.com/run-llama/llama_index/pull/20131)) ### llama-index-llms-oci-genai [0.6.4] + - Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146)) ### llama-index-llms-openai [0.6.5] + - chore: openai vbump ([#20095](https://github.com/run-llama/llama_index/pull/20095)) ### llama-index-readers-imdb-review [0.4.2] + - chore: Update selenium dependency in imdb-review reader ([#20105](https://github.com/run-llama/llama_index/pull/20105)) ### llama-index-retrievers-bedrock [0.5.0] + - feat(bedrock): add async support for AmazonKnowledgeBasesRetriever ([#20114](https://github.com/run-llama/llama_index/pull/20114)) ### llama-index-retrievers-superlinked [0.1.3] + - Update README.md ([#19829](https://github.com/run-llama/llama_index/pull/19829)) ### llama-index-storage-kvstore-postgres [0.4.2] + - fix: Replace raw SQL string interpolation with proper SQLAlchemy parameterized APIs in PostgresKVStore ([#20104](https://github.com/run-llama/llama_index/pull/20104)) ### llama-index-tools-mcp [0.4.3] + - Fix BasicMCPClient resource signatures ([#20118](https://github.com/run-llama/llama_index/pull/20118)) ### llama-index-vector-stores-postgres [0.7.1] + - Add GIN index support for text array metadata in PostgreSQL vector store ([#20130](https://github.com/run-llama/llama_index/pull/20130)) ## [2025-10-15] From 41b9ec4be6d5c7ee41a0d8394495e4add0037619 Mon Sep 17 00:00:00 2001 From: Florian-BACHO Date: Mon, 27 Oct 2025 14:35:12 +0100 Subject: [PATCH 3/6] Update file_buffer type --- .../llama_index/llms/google_genai/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py index 6f32d77300..1635525b89 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py @@ -1,7 +1,7 @@ import asyncio import logging from collections.abc import Sequence -from io import BytesIO +from io import BufferedIOBase from typing import ( TYPE_CHECKING, Any, @@ -223,7 +223,7 @@ def chat_from_gemini_response( async def create_file_part( - file_buffer: BytesIO, + file_buffer: BufferedIOBase, mime_type: str, file_mode: Literal["inline", "fileapi", "hybrid"], client: Optional[Client], From d733d203cc6d2e4c166cea4eeee9e9c0bf8999f3 Mon Sep 17 00:00:00 2001 From: Florian-BACHO Date: Mon, 27 Oct 2025 15:29:52 +0100 Subject: [PATCH 4/6] Update buffer type --- .../llama_index/llms/google_genai/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py index 1635525b89..bd569b1829 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py @@ -1,7 +1,7 @@ import asyncio import logging from collections.abc import Sequence -from io import BufferedIOBase +from io import IOBase from typing import ( TYPE_CHECKING, Any, @@ -223,7 +223,7 @@ def chat_from_gemini_response( async def create_file_part( - file_buffer: BufferedIOBase, + file_buffer: IOBase, mime_type: str, file_mode: Literal["inline", "fileapi", "hybrid"], client: Optional[Client], From c33b5e6705929bda6970f22d3f4481a6be890074 Mon Sep 17 00:00:00 2001 From: Florian-BACHO Date: Mon, 27 Oct 2025 17:28:26 +0100 Subject: [PATCH 5/6] Update FileAPI handling --- .../llama_index/llms/google_genai/base.py | 67 ++++++++---------- .../llama_index/llms/google_genai/utils.py | 70 ++++++++++--------- .../tests/test_llms_google_genai.py | 10 +-- 3 files changed, 73 insertions(+), 74 deletions(-) diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py index 84a45e5b60..0c0a9de385 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py @@ -308,7 +308,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any): **kwargs.pop("generation_config", {}), } params = {**kwargs, "generation_config": generation_config} - next_msg, chat_kwargs = asyncio.run( + next_msg, chat_kwargs, file_api_names = asyncio.run( prepare_chat_params( self.model, messages, self.file_mode, self._client, **params ) @@ -318,10 +318,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any): next_msg.parts if isinstance(next_msg, types.Content) else next_msg ) - if self.file_mode in ("fileapi", "hybrid"): - asyncio.run( - delete_uploaded_files([*chat_kwargs["history"], next_msg], self._client) - ) + asyncio.run(delete_uploaded_files(file_api_names, self._client)) return chat_from_gemini_response(response) @@ -332,7 +329,7 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any): **kwargs.pop("generation_config", {}), } params = {**kwargs, "generation_config": generation_config} - next_msg, chat_kwargs = await prepare_chat_params( + next_msg, chat_kwargs, file_api_names = await prepare_chat_params( self.model, messages, self.file_mode, self._client, **params ) chat = self._client.aio.chats.create(**chat_kwargs) @@ -340,10 +337,7 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any): next_msg.parts if isinstance(next_msg, types.Content) else next_msg ) - if self.file_mode in ("fileapi", "hybrid"): - await delete_uploaded_files( - [*chat_kwargs["history"], next_msg], self._client - ) + await delete_uploaded_files(file_api_names, self._client) return chat_from_gemini_response(response) @@ -365,7 +359,7 @@ def _stream_chat( **kwargs.pop("generation_config", {}), } params = {**kwargs, "generation_config": generation_config} - next_msg, chat_kwargs = asyncio.run( + next_msg, chat_kwargs, file_api_names = asyncio.run( prepare_chat_params( self.model, messages, self.file_mode, self._client, **params ) @@ -401,11 +395,7 @@ def gen() -> ChatResponseGen: yield llama_resp if self.file_mode in ("fileapi", "hybrid"): - asyncio.run( - delete_uploaded_files( - [*chat_kwargs["history"], next_msg], self._client - ) - ) + asyncio.run(delete_uploaded_files(file_api_names, self._client)) return gen() @@ -423,7 +413,7 @@ async def _astream_chat( **kwargs.pop("generation_config", {}), } params = {**kwargs, "generation_config": generation_config} - next_msg, chat_kwargs = await prepare_chat_params( + next_msg, chat_kwargs, file_api_names = await prepare_chat_params( self.model, messages, self.file_mode, self._client, **params ) chat = self._client.aio.chats.create(**chat_kwargs) @@ -464,10 +454,7 @@ async def gen() -> ChatResponseAsyncGen: ) yield llama_resp - if self.file_mode in ("fileapi", "hybrid"): - await delete_uploaded_files( - [*chat_kwargs["history"], next_msg], self._client - ) + await delete_uploaded_files(file_api_names, self._client) return gen() @@ -586,10 +573,13 @@ def structured_predict_without_function_calling( llm_kwargs = llm_kwargs or {} messages = prompt.format_messages(**prompt_args) - contents = [ + contents_and_names = [ asyncio.run(chat_message_to_gemini(message, self.file_mode, self._client)) for message in messages ] + contents = [it[0] for it in contents_and_names] + file_api_names = [name for it in contents_and_names for name in it[1]] + response = self._client.models.generate_content( model=self.model, contents=contents, @@ -604,8 +594,7 @@ def structured_predict_without_function_calling( }, ) - if self.file_mode in ("fileapi", "hybrid"): - asyncio.run(delete_uploaded_files(contents, self._client)) + asyncio.run(delete_uploaded_files(file_api_names, self._client)) if isinstance(response.parsed, BaseModel): return response.parsed @@ -634,20 +623,22 @@ def structured_predict( generation_config["response_schema"] = output_cls messages = prompt.format_messages(**prompt_args) - contents = [ + contents_and_names = [ asyncio.run( chat_message_to_gemini(message, self.file_mode, self._client) ) for message in messages ] + contents = [it[0] for it in contents_and_names] + file_api_names = [name for it in contents_and_names for name in it[1]] + response = self._client.models.generate_content( model=self.model, contents=contents, config=generation_config, ) - if self.file_mode in ("fileapi", "hybrid"): - asyncio.run(delete_uploaded_files(contents, self._client)) + asyncio.run(delete_uploaded_files(file_api_names, self._client)) if isinstance(response.parsed, BaseModel): return response.parsed @@ -681,20 +672,22 @@ async def astructured_predict( generation_config["response_schema"] = output_cls messages = prompt.format_messages(**prompt_args) - contents = await asyncio.gather( + contents_and_names = await asyncio.gather( *[ chat_message_to_gemini(message, self.file_mode, self._client) for message in messages ] ) + contents = [it[0] for it in contents_and_names] + file_api_names = [name for it in contents_and_names for name in it[1]] + response = await self._client.aio.models.generate_content( model=self.model, contents=contents, config=generation_config, ) - if self.file_mode in ("fileapi", "hybrid"): - await delete_uploaded_files(contents, self._client) + await delete_uploaded_files(file_api_names, self._client) if isinstance(response.parsed, BaseModel): return response.parsed @@ -728,12 +721,14 @@ def stream_structured_predict( generation_config["response_schema"] = output_cls messages = prompt.format_messages(**prompt_args) - contents = [ + contents_and_names = [ asyncio.run( chat_message_to_gemini(message, self.file_mode, self._client) ) for message in messages ] + contents = [it[0] for it in contents_and_names] + file_api_names = [name for it in contents_and_names for name in it[1]] def gen() -> Generator[Union[Model, FlexibleModel], None, None]: flexible_model = create_flexible_model(output_cls) @@ -757,8 +752,7 @@ def gen() -> Generator[Union[Model, FlexibleModel], None, None]: if streaming_model: yield streaming_model - if self.file_mode in ("fileapi", "hybrid"): - asyncio.run(delete_uploaded_files(contents, self._client)) + asyncio.run(delete_uploaded_files(file_api_names, self._client)) return gen() else: @@ -788,12 +782,14 @@ async def astream_structured_predict( generation_config["response_schema"] = output_cls messages = prompt.format_messages(**prompt_args) - contents = await asyncio.gather( + contents_and_names = await asyncio.gather( *[ chat_message_to_gemini(message, self.file_mode, self._client) for message in messages ] ) + contents = [it[0] for it in contents_and_names] + file_api_names = [name for it in contents_and_names for name in it[1]] async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]: flexible_model = create_flexible_model(output_cls) @@ -817,8 +813,7 @@ async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]: if streaming_model: yield streaming_model - if self.file_mode in ("fileapi", "hybrid"): - await delete_uploaded_files(contents, self._client) + await delete_uploaded_files(file_api_names, self._client) return gen() else: diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py index bd569b1829..0854125615 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py @@ -227,7 +227,7 @@ async def create_file_part( mime_type: str, file_mode: Literal["inline", "fileapi", "hybrid"], client: Optional[Client], -) -> types.PartUnion: +) -> tuple[types.Part, Optional[str]]: """Create a Part or File object for the given file depending on its size.""" if file_mode in ("inline", "hybrid"): file_buffer.seek(0, 2) # Seek to end @@ -238,7 +238,7 @@ async def create_file_part( return types.Part.from_bytes( data=file_buffer.read(), mime_type=mime_type, - ) + ), None elif file_mode == "inline": raise ValueError("Files in inline mode must be smaller than 20MB.") @@ -257,19 +257,16 @@ async def create_file_part( if file.state.name == "FAILED": raise ValueError("Failed to upload the file with FileAPI") - return file + return types.Part.from_uri( + file_uri=file.uri, + mime_type=mime_type, + ), file.name -async def delete_uploaded_files( - contents: list[Union[types.Content, types.File]], client: Client -) -> None: +async def delete_uploaded_files(file_api_names: list[str], client: Client) -> None: """Delete files uploaded with File API.""" await asyncio.gather( - *[ - client.aio.files.delete(name=content.name) - for content in contents - if isinstance(content, types.File) - ] + *[client.aio.files.delete(name=name) for name in file_api_names] ) @@ -277,11 +274,14 @@ async def chat_message_to_gemini( message: ChatMessage, file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid", client: Optional[Client] = None, -) -> Union[types.Content, types.File]: +) -> tuple[types.Content, list[str]]: """Convert ChatMessages to Gemini-specific history, including ImageDocuments.""" parts = [] + file_api_names = [] part = None for index, block in enumerate(message.blocks): + file_api_name = None + if isinstance(block, TextBlock): if block.text: part = types.Part.from_text(text=block.text) @@ -294,10 +294,9 @@ async def chat_message_to_gemini( else "image/jpeg" # TODO: Fail? ) - part = await create_file_part(file_buffer, mime_type, file_mode, client) - - if isinstance(part, types.File): - return part # Return the file as it is a message content and not a part + part, file_api_name = await create_file_part( + file_buffer, mime_type, file_mode, client + ) elif isinstance(block, VideoBlock): file_buffer = block.resolve_video(as_base64=False) @@ -307,13 +306,10 @@ async def chat_message_to_gemini( else "video/mp4" # TODO: Fail? ) - part = await create_file_part(file_buffer, mime_type, file_mode, client) - - if isinstance(part, types.File): - return part # Return the file as it is a message content and not a part - + part, file_api_name = await create_file_part( + file_buffer, mime_type, file_mode, client + ) part.video_metadata = types.VideoMetadata(fps=block.fps) - elif isinstance(block, DocumentBlock): file_buffer = block.resolve_document() @@ -323,10 +319,9 @@ async def chat_message_to_gemini( else "application/pdf" ) - part = await create_file_part(file_buffer, mime_type, file_mode, client) - - if isinstance(part, types.File): - return part # Return the file as it is a message content and not a part + part, file_api_name = await create_file_part( + file_buffer, mime_type, file_mode, client + ) elif isinstance(block, ThinkingBlock): if block.content: part = types.Part.from_text(text=block.content) @@ -337,6 +332,10 @@ async def chat_message_to_gemini( else: msg = f"Unsupported content block type: {type(block).__name__}" raise ValueError(msg) + + if file_api_name is not None: + file_api_names.append(file_api_name) + if part is not None: if message.role == MessageRole.MODEL: thought_signatures = message.additional_kwargs.get( @@ -372,12 +371,12 @@ async def chat_message_to_gemini( ) return types.Content( role=ROLES_TO_GEMINI[message.role], parts=[function_response_part] - ) + ), file_api_names return types.Content( role=ROLES_TO_GEMINI[message.role], parts=parts, - ) + ), file_api_names def convert_schema_to_function_declaration( @@ -414,16 +413,16 @@ class ChatParams(typing.TypedDict): async def prepare_chat_params( model: str, messages: Sequence[ChatMessage], - use_file_api: bool = False, + file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid", client: Optional[Client] = None, **kwargs: Any, -) -> tuple[Union[types.Content, types.File], ChatParams]: +) -> tuple[types.Content, ChatParams, list[str]]: """ Prepare common parameters for chat creation. Args: messages: Sequence of chat messages - use_file_api: Whether to use File API or not for large files. + file_mode: The mode for file uploading client: Google Genai client used for uploading large files. **kwargs: Additional keyword arguments @@ -431,6 +430,7 @@ async def prepare_chat_params( tuple containing: - next_msg: the next message to send - chat_kwargs: processed keyword arguments for chat creation + - file_api_names: list of file api names to delete after chat call """ # Extract system message if present @@ -442,12 +442,14 @@ async def prepare_chat_params( # Merge messages with the same role merged_messages = merge_neighboring_same_role_messages(messages) - initial_history = await asyncio.gather( + initial_history_and_names = await asyncio.gather( *[ - chat_message_to_gemini(message, use_file_api, client) + chat_message_to_gemini(message, file_mode, client) for message in merged_messages ] ) + initial_history = [it[0] for it in initial_history_and_names] + file_api_names = [name for it in initial_history_and_names for name in it[1]] # merge tool messages into a single tool message # while maintaining the tool names @@ -514,7 +516,7 @@ async def prepare_chat_params( chat_kwargs["config"] = types.GenerateContentConfig(**config) - return next_msg, chat_kwargs + return next_msg, chat_kwargs, file_api_names def handle_streaming_flexible_model( diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py b/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py index 6be1799fc3..7f45d76874 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py @@ -758,7 +758,7 @@ async def test_prepare_chat_params_more_than_2_tool_calls(): ChatMessage(content="Here is a list of puppies.", role=MessageRole.ASSISTANT), ] - next_msg, chat_kwargs = await prepare_chat_params( + next_msg, chat_kwargs, file_api_names = await prepare_chat_params( expected_model_name, test_messages ) @@ -817,7 +817,9 @@ async def test_prepare_chat_params_with_system_message(): ] # Execute prepare_chat_params - next_msg, chat_kwargs = await prepare_chat_params(model_name, messages) + next_msg, chat_kwargs, file_api_names = await prepare_chat_params( + model_name, messages + ) # Verify system_prompt is forwarded to system_instruction cfg = chat_kwargs["config"] @@ -1031,7 +1033,7 @@ async def test_cached_content_in_chat_params() -> None: messages = [ChatMessage(content="Test message", role=MessageRole.USER)] # Prepare chat params with the LLM's generation config - next_msg, chat_kwargs = await prepare_chat_params( + next_msg, chat_kwargs, file_api_names = await prepare_chat_params( llm.model, messages, generation_config=llm._generation_config ) @@ -1199,7 +1201,7 @@ async def test_built_in_tool_in_chat_params() -> None: ) # Prepare chat params - next_msg, chat_kwargs = await prepare_chat_params( + next_msg, chat_kwargs, file_api_names = await prepare_chat_params( llm.model, messages, generation_config=llm._generation_config ) From b4eaf8e6ff2af051fdb9d455372b9dac412cea60 Mon Sep 17 00:00:00 2001 From: Florian-BACHO Date: Thu, 6 Nov 2025 12:06:41 +0100 Subject: [PATCH 6/6] Fix lint --- .../llama_index/llms/google_genai/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py index 31c155cf8f..8098ccbbbc 100644 --- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py @@ -14,7 +14,6 @@ Literal, cast, ) -from io import BytesIO import typing import google.genai.types as types