From 40cf9317b19e4b83a1fff6b8799ea6bbb6d2b5e3 Mon Sep 17 00:00:00 2001
From: Florian-BACHO <florian.bacho.ext@mediatransports.com>
Date: Mon, 27 Oct 2025 10:42:48 +0100
Subject: [PATCH 1/6] Change use_file_api to file_mode

---
 .../llama_index/llms/google_genai/base.py     | 49 +++++++++----------
 .../llama_index/llms/google_genai/utils.py    | 46 ++++++++++-------
 .../pyproject.toml                            |  2 +-
 .../llama-index-llms-google-genai/uv.lock     |  4 +-
 4 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py
index 69e5ad4055..84a45e5b60 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py
@@ -16,6 +16,7 @@
     Type,
     Union,
     Callable,
+    Literal,
 )
 
 
@@ -139,9 +140,9 @@ class GoogleGenAI(FunctionCallingLLM):
         default=None,
         description="Google GenAI tool to use for the model to augment responses.",
     )
-    use_file_api: bool = Field(
-        default=True,
-        description="Whether or not to use the FileAPI for large files (>20MB).",
+    file_mode: Literal["inline", "fileapi", "hybrid"] = Field(
+        default="hybrid",
+        description="Whether to use inline-only, FileAPI-only or both for handling files.",
     )
 
     _max_tokens: int = PrivateAttr()
@@ -165,7 +166,7 @@ def __init__(
         is_function_calling_model: bool = True,
         cached_content: Optional[str] = None,
         built_in_tool: Optional[types.Tool] = None,
-        use_file_api: bool = True,
+        file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid",
         **kwargs: Any,
     ):
         # API keys are optional. The API can be authorised via OAuth (detected
@@ -214,7 +215,7 @@ def __init__(
             max_retries=max_retries,
             cached_content=cached_content,
             built_in_tool=built_in_tool,
-            use_file_api=use_file_api,
+            file_mode=file_mode,
             **kwargs,
         )
 
@@ -309,7 +310,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any):
         params = {**kwargs, "generation_config": generation_config}
         next_msg, chat_kwargs = asyncio.run(
             prepare_chat_params(
-                self.model, messages, self.use_file_api, self._client, **params
+                self.model, messages, self.file_mode, self._client, **params
             )
         )
         chat = self._client.chats.create(**chat_kwargs)
@@ -317,7 +318,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any):
             next_msg.parts if isinstance(next_msg, types.Content) else next_msg
         )
 
-        if self.use_file_api:
+        if self.file_mode in ("fileapi", "hybrid"):
             asyncio.run(
                 delete_uploaded_files([*chat_kwargs["history"], next_msg], self._client)
             )
@@ -332,14 +333,14 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any):
         }
         params = {**kwargs, "generation_config": generation_config}
         next_msg, chat_kwargs = await prepare_chat_params(
-            self.model, messages, self.use_file_api, self._client, **params
+            self.model, messages, self.file_mode, self._client, **params
         )
         chat = self._client.aio.chats.create(**chat_kwargs)
         response = await chat.send_message(
             next_msg.parts if isinstance(next_msg, types.Content) else next_msg
         )
 
-        if self.use_file_api:
+        if self.file_mode in ("fileapi", "hybrid"):
             await delete_uploaded_files(
                 [*chat_kwargs["history"], next_msg], self._client
             )
@@ -366,7 +367,7 @@ def _stream_chat(
         params = {**kwargs, "generation_config": generation_config}
         next_msg, chat_kwargs = asyncio.run(
             prepare_chat_params(
-                self.model, messages, self.use_file_api, self._client, **params
+                self.model, messages, self.file_mode, self._client, **params
             )
         )
         chat = self._client.chats.create(**chat_kwargs)
@@ -399,7 +400,7 @@ def gen() -> ChatResponseGen:
                 llama_resp.message.additional_kwargs["tool_calls"] = existing_tool_calls
                 yield llama_resp
 
-            if self.use_file_api:
+            if self.file_mode in ("fileapi", "hybrid"):
                 asyncio.run(
                     delete_uploaded_files(
                         [*chat_kwargs["history"], next_msg], self._client
@@ -423,7 +424,7 @@ async def _astream_chat(
         }
         params = {**kwargs, "generation_config": generation_config}
         next_msg, chat_kwargs = await prepare_chat_params(
-            self.model, messages, self.use_file_api, self._client, **params
+            self.model, messages, self.file_mode, self._client, **params
         )
         chat = self._client.aio.chats.create(**chat_kwargs)
 
@@ -463,7 +464,7 @@ async def gen() -> ChatResponseAsyncGen:
                             )
                             yield llama_resp
 
-            if self.use_file_api:
+            if self.file_mode in ("fileapi", "hybrid"):
                 await delete_uploaded_files(
                     [*chat_kwargs["history"], next_msg], self._client
                 )
@@ -586,9 +587,7 @@ def structured_predict_without_function_calling(
 
         messages = prompt.format_messages(**prompt_args)
         contents = [
-            asyncio.run(
-                chat_message_to_gemini(message, self.use_file_api, self._client)
-            )
+            asyncio.run(chat_message_to_gemini(message, self.file_mode, self._client))
             for message in messages
         ]
         response = self._client.models.generate_content(
@@ -605,7 +604,7 @@ def structured_predict_without_function_calling(
             },
         )
 
-        if self.use_file_api:
+        if self.file_mode in ("fileapi", "hybrid"):
             asyncio.run(delete_uploaded_files(contents, self._client))
 
         if isinstance(response.parsed, BaseModel):
@@ -637,7 +636,7 @@ def structured_predict(
             messages = prompt.format_messages(**prompt_args)
             contents = [
                 asyncio.run(
-                    chat_message_to_gemini(message, self.use_file_api, self._client)
+                    chat_message_to_gemini(message, self.file_mode, self._client)
                 )
                 for message in messages
             ]
@@ -647,7 +646,7 @@ def structured_predict(
                 config=generation_config,
             )
 
-            if self.use_file_api:
+            if self.file_mode in ("fileapi", "hybrid"):
                 asyncio.run(delete_uploaded_files(contents, self._client))
 
             if isinstance(response.parsed, BaseModel):
@@ -684,7 +683,7 @@ async def astructured_predict(
             messages = prompt.format_messages(**prompt_args)
             contents = await asyncio.gather(
                 *[
-                    chat_message_to_gemini(message, self.use_file_api, self._client)
+                    chat_message_to_gemini(message, self.file_mode, self._client)
                     for message in messages
                 ]
             )
@@ -694,7 +693,7 @@ async def astructured_predict(
                 config=generation_config,
             )
 
-            if self.use_file_api:
+            if self.file_mode in ("fileapi", "hybrid"):
                 await delete_uploaded_files(contents, self._client)
 
             if isinstance(response.parsed, BaseModel):
@@ -731,7 +730,7 @@ def stream_structured_predict(
             messages = prompt.format_messages(**prompt_args)
             contents = [
                 asyncio.run(
-                    chat_message_to_gemini(message, self.use_file_api, self._client)
+                    chat_message_to_gemini(message, self.file_mode, self._client)
                 )
                 for message in messages
             ]
@@ -758,7 +757,7 @@ def gen() -> Generator[Union[Model, FlexibleModel], None, None]:
                         if streaming_model:
                             yield streaming_model
 
-                if self.use_file_api:
+                if self.file_mode in ("fileapi", "hybrid"):
                     asyncio.run(delete_uploaded_files(contents, self._client))
 
             return gen()
@@ -791,7 +790,7 @@ async def astream_structured_predict(
             messages = prompt.format_messages(**prompt_args)
             contents = await asyncio.gather(
                 *[
-                    chat_message_to_gemini(message, self.use_file_api, self._client)
+                    chat_message_to_gemini(message, self.file_mode, self._client)
                     for message in messages
                 ]
             )
@@ -818,7 +817,7 @@ async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]:
                         if streaming_model:
                             yield streaming_model
 
-                if self.use_file_api:
+                if self.file_mode in ("fileapi", "hybrid"):
                     await delete_uploaded_files(contents, self._client)
 
             return gen()
diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
index fe02a5a69b..6f32d77300 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
@@ -10,6 +10,7 @@
     Optional,
     Type,
     Tuple,
+    Literal,
 )
 import typing
 
@@ -222,25 +223,30 @@ def chat_from_gemini_response(
 
 
 async def create_file_part(
-    file_bytes: bytes, mime_type: str, use_file_api: bool, client: Optional[Client]
+    file_buffer: BytesIO,
+    mime_type: str,
+    file_mode: Literal["inline", "fileapi", "hybrid"],
+    client: Optional[Client],
 ) -> types.PartUnion:
     """Create a Part or File object for the given file depending on its size."""
-    if (
-        not use_file_api
-        or len(file_bytes)
-        < 20 * 1024 * 1024  # 20MB is the Gemini inline data size limit
-    ):
-        return types.Part.from_bytes(
-            data=file_bytes,
-            mime_type=mime_type,
-        )
+    if file_mode in ("inline", "hybrid"):
+        file_buffer.seek(0, 2)  # Seek to end
+        size = file_buffer.tell()  # Get file size
+        file_buffer.seek(0)  # Reset to beginning
+
+        if size < 20 * 1024 * 1024:  # 20MB is the Gemini inline data size limit
+            return types.Part.from_bytes(
+                data=file_buffer.read(),
+                mime_type=mime_type,
+            )
+        elif file_mode == "inline":
+            raise ValueError("Files in inline mode must be smaller than 20MB.")
 
     if client is None:
         raise ValueError("A Google GenAI client must be provided for use with FileAPI.")
 
-    buffer = BytesIO(file_bytes)
     file = await client.aio.files.upload(
-        file=buffer, config=types.UploadFileConfig(mime_type=mime_type)
+        file=file_buffer, config=types.UploadFileConfig(mime_type=mime_type)
     )
 
     # Wait for file processing
@@ -268,7 +274,9 @@ async def delete_uploaded_files(
 
 
 async def chat_message_to_gemini(
-    message: ChatMessage, use_file_api: bool = False, client: Optional[Client] = None
+    message: ChatMessage,
+    file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid",
+    client: Optional[Client] = None,
 ) -> Union[types.Content, types.File]:
     """Convert ChatMessages to Gemini-specific history, including ImageDocuments."""
     parts = []
@@ -278,7 +286,7 @@ async def chat_message_to_gemini(
             if block.text:
                 part = types.Part.from_text(text=block.text)
         elif isinstance(block, ImageBlock):
-            file_bytes = block.resolve_image(as_base64=False).read()
+            file_buffer = block.resolve_image(as_base64=False)
 
             mime_type = (
                 block.image_mimetype
@@ -286,13 +294,12 @@ async def chat_message_to_gemini(
                 else "image/jpeg"  # TODO: Fail?
             )
 
-            part = await create_file_part(file_bytes, mime_type, use_file_api, client)
+            part = await create_file_part(file_buffer, mime_type, file_mode, client)
 
             if isinstance(part, types.File):
                 return part  # Return the file as it is a message content and not a part
         elif isinstance(block, VideoBlock):
             file_buffer = block.resolve_video(as_base64=False)
-            file_bytes = file_buffer.read()
 
             mime_type = (
                 block.video_mimetype
@@ -300,7 +307,7 @@ async def chat_message_to_gemini(
                 else "video/mp4"  # TODO: Fail?
             )
 
-            part = await create_file_part(file_bytes, mime_type, use_file_api, client)
+            part = await create_file_part(file_buffer, mime_type, file_mode, client)
 
             if isinstance(part, types.File):
                 return part  # Return the file as it is a message content and not a part
@@ -309,13 +316,14 @@ async def chat_message_to_gemini(
 
         elif isinstance(block, DocumentBlock):
             file_buffer = block.resolve_document()
-            file_bytes = file_buffer.read()
+
             mime_type = (
                 block.document_mimetype
                 if block.document_mimetype is not None
                 else "application/pdf"
             )
-            part = await create_file_part(file_bytes, mime_type, use_file_api, client)
+
+            part = await create_file_part(file_buffer, mime_type, file_mode, client)
 
             if isinstance(part, types.File):
                 return part  # Return the file as it is a message content and not a part
diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml
index 181ceda88c..5c333fa164 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
 
 [project]
 name = "llama-index-llms-google-genai"
-version = "0.6.2"
+version = "0.7.0"
 description = "llama-index llms google genai integration"
 authors = [{name = "Your Name", email = "you@example.com"}]
 requires-python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock b/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock
index f236f5cb34..6c94ce251b 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.9, <4.0"
 resolution-markers = [
     "python_full_version >= '3.11'",
@@ -1706,7 +1706,7 @@ wheels = [
 
 [[package]]
 name = "llama-index-llms-google-genai"
-version = "0.6.2"
+version = "0.7.0"
 source = { editable = "." }
 dependencies = [
     { name = "google-genai" },

From 597111b2e769fbe458f640e4d0ea928d653fc8f7 Mon Sep 17 00:00:00 2001
From: Florian-BACHO <florian.bacho.ext@mediatransports.com>
Date: Mon, 27 Oct 2025 10:59:23 +0100
Subject: [PATCH 2/6] Fix lint

---
 CHANGELOG.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af76f376c0..3335d5bf0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,52 +5,67 @@
 ## [2025-10-26]
 
 ### llama-index-core [0.14.6]
+
 - Add allow_parallel_tool_calls for non-streaming ([#20117](https://github.com/run-llama/llama_index/pull/20117))
 - Fix invalid use of field-specific metadata ([#20122](https://github.com/run-llama/llama_index/pull/20122))
 - update doc for SemanticSplitterNodeParser ([#20125](https://github.com/run-llama/llama_index/pull/20125))
 - fix rare cases when sentence splits are larger than chunk size ([#20147](https://github.com/run-llama/llama_index/pull/20147))
 
 ### llama-index-embeddings-bedrock [0.7.0]
+
 - Fix BedrockEmbedding to support Cohere v4 response format ([#20094](https://github.com/run-llama/llama_index/pull/20094))
 
 ### llama-index-embeddings-isaacus [0.1.0]
+
 - feat: Isaacus embeddings integration ([#20124](https://github.com/run-llama/llama_index/pull/20124))
 
 ### llama-index-embeddings-oci-genai [0.4.2]
+
 - Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))
 
 ### llama-index-llms-anthropic [0.9.7]
+
 - Fix double token stream in anthropic llm ([#20108](https://github.com/run-llama/llama_index/pull/20108))
 - Ensure anthropic content delta only has user facing response ([#20113](https://github.com/run-llama/llama_index/pull/20113))
 
 ### llama-index-llms-baseten [0.1.7]
+
 - add GLM ([#20121](https://github.com/run-llama/llama_index/pull/20121))
 
 ### llama-index-llms-helicone [0.1.0]
+
 - integrate helicone to llama-index ([#20131](https://github.com/run-llama/llama_index/pull/20131))
 
 ### llama-index-llms-oci-genai [0.6.4]
+
 - Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))
 
 ### llama-index-llms-openai [0.6.5]
+
 - chore: openai vbump ([#20095](https://github.com/run-llama/llama_index/pull/20095))
 
 ### llama-index-readers-imdb-review [0.4.2]
+
 - chore: Update selenium dependency in imdb-review reader ([#20105](https://github.com/run-llama/llama_index/pull/20105))
 
 ### llama-index-retrievers-bedrock [0.5.0]
+
 - feat(bedrock): add async support for AmazonKnowledgeBasesRetriever ([#20114](https://github.com/run-llama/llama_index/pull/20114))
 
 ### llama-index-retrievers-superlinked [0.1.3]
+
 - Update README.md ([#19829](https://github.com/run-llama/llama_index/pull/19829))
 
 ### llama-index-storage-kvstore-postgres [0.4.2]
+
 - fix: Replace raw SQL string interpolation with proper SQLAlchemy parameterized APIs in PostgresKVStore ([#20104](https://github.com/run-llama/llama_index/pull/20104))
 
 ### llama-index-tools-mcp [0.4.3]
+
 - Fix BasicMCPClient resource signatures ([#20118](https://github.com/run-llama/llama_index/pull/20118))
 
 ### llama-index-vector-stores-postgres [0.7.1]
+
 - Add GIN index support for text array metadata in PostgreSQL vector store ([#20130](https://github.com/run-llama/llama_index/pull/20130))
 
 ## [2025-10-15]

From 41b9ec4be6d5c7ee41a0d8394495e4add0037619 Mon Sep 17 00:00:00 2001
From: Florian-BACHO <florian.bacho.ext@mediatransports.com>
Date: Mon, 27 Oct 2025 14:35:12 +0100
Subject: [PATCH 3/6] Update file_buffer type

---
 .../llama_index/llms/google_genai/utils.py                    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
index 6f32d77300..1635525b89 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
@@ -1,7 +1,7 @@
 import asyncio
 import logging
 from collections.abc import Sequence
-from io import BytesIO
+from io import BufferedIOBase
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -223,7 +223,7 @@ def chat_from_gemini_response(
 
 
 async def create_file_part(
-    file_buffer: BytesIO,
+    file_buffer: BufferedIOBase,
     mime_type: str,
     file_mode: Literal["inline", "fileapi", "hybrid"],
     client: Optional[Client],

From d733d203cc6d2e4c166cea4eeee9e9c0bf8999f3 Mon Sep 17 00:00:00 2001
From: Florian-BACHO <florian.bacho.ext@mediatransports.com>
Date: Mon, 27 Oct 2025 15:29:52 +0100
Subject: [PATCH 4/6] Update buffer type

---
 .../llama_index/llms/google_genai/utils.py                    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
index 1635525b89..bd569b1829 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
@@ -1,7 +1,7 @@
 import asyncio
 import logging
 from collections.abc import Sequence
-from io import BufferedIOBase
+from io import IOBase
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -223,7 +223,7 @@ def chat_from_gemini_response(
 
 
 async def create_file_part(
-    file_buffer: BufferedIOBase,
+    file_buffer: IOBase,
     mime_type: str,
     file_mode: Literal["inline", "fileapi", "hybrid"],
     client: Optional[Client],

From c33b5e6705929bda6970f22d3f4481a6be890074 Mon Sep 17 00:00:00 2001
From: Florian-BACHO <florian.bacho.ext@mediatransports.com>
Date: Mon, 27 Oct 2025 17:28:26 +0100
Subject: [PATCH 5/6] Update FileAPI handling

---
 .../llama_index/llms/google_genai/base.py     | 67 ++++++++----------
 .../llama_index/llms/google_genai/utils.py    | 70 ++++++++++---------
 .../tests/test_llms_google_genai.py           | 10 +--
 3 files changed, 73 insertions(+), 74 deletions(-)

diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py
index 84a45e5b60..0c0a9de385 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/base.py
@@ -308,7 +308,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any):
             **kwargs.pop("generation_config", {}),
         }
         params = {**kwargs, "generation_config": generation_config}
-        next_msg, chat_kwargs = asyncio.run(
+        next_msg, chat_kwargs, file_api_names = asyncio.run(
             prepare_chat_params(
                 self.model, messages, self.file_mode, self._client, **params
             )
@@ -318,10 +318,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any):
             next_msg.parts if isinstance(next_msg, types.Content) else next_msg
         )
 
-        if self.file_mode in ("fileapi", "hybrid"):
-            asyncio.run(
-                delete_uploaded_files([*chat_kwargs["history"], next_msg], self._client)
-            )
+        asyncio.run(delete_uploaded_files(file_api_names, self._client))
 
         return chat_from_gemini_response(response)
 
@@ -332,7 +329,7 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any):
             **kwargs.pop("generation_config", {}),
         }
         params = {**kwargs, "generation_config": generation_config}
-        next_msg, chat_kwargs = await prepare_chat_params(
+        next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
             self.model, messages, self.file_mode, self._client, **params
         )
         chat = self._client.aio.chats.create(**chat_kwargs)
@@ -340,10 +337,7 @@ async def _achat(self, messages: Sequence[ChatMessage], **kwargs: Any):
             next_msg.parts if isinstance(next_msg, types.Content) else next_msg
         )
 
-        if self.file_mode in ("fileapi", "hybrid"):
-            await delete_uploaded_files(
-                [*chat_kwargs["history"], next_msg], self._client
-            )
+        await delete_uploaded_files(file_api_names, self._client)
 
         return chat_from_gemini_response(response)
 
@@ -365,7 +359,7 @@ def _stream_chat(
             **kwargs.pop("generation_config", {}),
         }
         params = {**kwargs, "generation_config": generation_config}
-        next_msg, chat_kwargs = asyncio.run(
+        next_msg, chat_kwargs, file_api_names = asyncio.run(
             prepare_chat_params(
                 self.model, messages, self.file_mode, self._client, **params
             )
@@ -401,11 +395,7 @@ def gen() -> ChatResponseGen:
                 yield llama_resp
 
             if self.file_mode in ("fileapi", "hybrid"):
-                asyncio.run(
-                    delete_uploaded_files(
-                        [*chat_kwargs["history"], next_msg], self._client
-                    )
-                )
+                asyncio.run(delete_uploaded_files(file_api_names, self._client))
 
         return gen()
 
@@ -423,7 +413,7 @@ async def _astream_chat(
             **kwargs.pop("generation_config", {}),
         }
         params = {**kwargs, "generation_config": generation_config}
-        next_msg, chat_kwargs = await prepare_chat_params(
+        next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
             self.model, messages, self.file_mode, self._client, **params
         )
         chat = self._client.aio.chats.create(**chat_kwargs)
@@ -464,10 +454,7 @@ async def gen() -> ChatResponseAsyncGen:
                             )
                             yield llama_resp
 
-            if self.file_mode in ("fileapi", "hybrid"):
-                await delete_uploaded_files(
-                    [*chat_kwargs["history"], next_msg], self._client
-                )
+            await delete_uploaded_files(file_api_names, self._client)
 
         return gen()
 
@@ -586,10 +573,13 @@ def structured_predict_without_function_calling(
         llm_kwargs = llm_kwargs or {}
 
         messages = prompt.format_messages(**prompt_args)
-        contents = [
+        contents_and_names = [
             asyncio.run(chat_message_to_gemini(message, self.file_mode, self._client))
             for message in messages
         ]
+        contents = [it[0] for it in contents_and_names]
+        file_api_names = [name for it in contents_and_names for name in it[1]]
+
         response = self._client.models.generate_content(
             model=self.model,
             contents=contents,
@@ -604,8 +594,7 @@ def structured_predict_without_function_calling(
             },
         )
 
-        if self.file_mode in ("fileapi", "hybrid"):
-            asyncio.run(delete_uploaded_files(contents, self._client))
+        asyncio.run(delete_uploaded_files(file_api_names, self._client))
 
         if isinstance(response.parsed, BaseModel):
             return response.parsed
@@ -634,20 +623,22 @@ def structured_predict(
             generation_config["response_schema"] = output_cls
 
             messages = prompt.format_messages(**prompt_args)
-            contents = [
+            contents_and_names = [
                 asyncio.run(
                     chat_message_to_gemini(message, self.file_mode, self._client)
                 )
                 for message in messages
             ]
+            contents = [it[0] for it in contents_and_names]
+            file_api_names = [name for it in contents_and_names for name in it[1]]
+
             response = self._client.models.generate_content(
                 model=self.model,
                 contents=contents,
                 config=generation_config,
             )
 
-            if self.file_mode in ("fileapi", "hybrid"):
-                asyncio.run(delete_uploaded_files(contents, self._client))
+            asyncio.run(delete_uploaded_files(file_api_names, self._client))
 
             if isinstance(response.parsed, BaseModel):
                 return response.parsed
@@ -681,20 +672,22 @@ async def astructured_predict(
             generation_config["response_schema"] = output_cls
 
             messages = prompt.format_messages(**prompt_args)
-            contents = await asyncio.gather(
+            contents_and_names = await asyncio.gather(
                 *[
                     chat_message_to_gemini(message, self.file_mode, self._client)
                     for message in messages
                 ]
             )
+            contents = [it[0] for it in contents_and_names]
+            file_api_names = [name for it in contents_and_names for name in it[1]]
+
             response = await self._client.aio.models.generate_content(
                 model=self.model,
                 contents=contents,
                 config=generation_config,
             )
 
-            if self.file_mode in ("fileapi", "hybrid"):
-                await delete_uploaded_files(contents, self._client)
+            await delete_uploaded_files(file_api_names, self._client)
 
             if isinstance(response.parsed, BaseModel):
                 return response.parsed
@@ -728,12 +721,14 @@ def stream_structured_predict(
             generation_config["response_schema"] = output_cls
 
             messages = prompt.format_messages(**prompt_args)
-            contents = [
+            contents_and_names = [
                 asyncio.run(
                     chat_message_to_gemini(message, self.file_mode, self._client)
                 )
                 for message in messages
             ]
+            contents = [it[0] for it in contents_and_names]
+            file_api_names = [name for it in contents_and_names for name in it[1]]
 
             def gen() -> Generator[Union[Model, FlexibleModel], None, None]:
                 flexible_model = create_flexible_model(output_cls)
@@ -757,8 +752,7 @@ def gen() -> Generator[Union[Model, FlexibleModel], None, None]:
                         if streaming_model:
                             yield streaming_model
 
-                if self.file_mode in ("fileapi", "hybrid"):
-                    asyncio.run(delete_uploaded_files(contents, self._client))
+                asyncio.run(delete_uploaded_files(file_api_names, self._client))
 
             return gen()
         else:
@@ -788,12 +782,14 @@ async def astream_structured_predict(
             generation_config["response_schema"] = output_cls
 
             messages = prompt.format_messages(**prompt_args)
-            contents = await asyncio.gather(
+            contents_and_names = await asyncio.gather(
                 *[
                     chat_message_to_gemini(message, self.file_mode, self._client)
                     for message in messages
                 ]
             )
+            contents = [it[0] for it in contents_and_names]
+            file_api_names = [name for it in contents_and_names for name in it[1]]
 
             async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]:
                 flexible_model = create_flexible_model(output_cls)
@@ -817,8 +813,7 @@ async def gen() -> AsyncGenerator[Union[Model, FlexibleModel], None]:
                         if streaming_model:
                             yield streaming_model
 
-                if self.file_mode in ("fileapi", "hybrid"):
-                    await delete_uploaded_files(contents, self._client)
+                await delete_uploaded_files(file_api_names, self._client)
 
             return gen()
         else:
diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
index bd569b1829..0854125615 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
@@ -227,7 +227,7 @@ async def create_file_part(
     mime_type: str,
     file_mode: Literal["inline", "fileapi", "hybrid"],
     client: Optional[Client],
-) -> types.PartUnion:
+) -> tuple[types.Part, Optional[str]]:
     """Create a Part or File object for the given file depending on its size."""
     if file_mode in ("inline", "hybrid"):
         file_buffer.seek(0, 2)  # Seek to end
@@ -238,7 +238,7 @@ async def create_file_part(
             return types.Part.from_bytes(
                 data=file_buffer.read(),
                 mime_type=mime_type,
-            )
+            ), None
         elif file_mode == "inline":
             raise ValueError("Files in inline mode must be smaller than 20MB.")
 
@@ -257,19 +257,16 @@ async def create_file_part(
     if file.state.name == "FAILED":
         raise ValueError("Failed to upload the file with FileAPI")
 
-    return file
+    return types.Part.from_uri(
+        file_uri=file.uri,
+        mime_type=mime_type,
+    ), file.name
 
 
-async def delete_uploaded_files(
-    contents: list[Union[types.Content, types.File]], client: Client
-) -> None:
+async def delete_uploaded_files(file_api_names: list[str], client: Client) -> None:
     """Delete files uploaded with File API."""
     await asyncio.gather(
-        *[
-            client.aio.files.delete(name=content.name)
-            for content in contents
-            if isinstance(content, types.File)
-        ]
+        *[client.aio.files.delete(name=name) for name in file_api_names]
     )
 
 
@@ -277,11 +274,14 @@ async def chat_message_to_gemini(
     message: ChatMessage,
     file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid",
     client: Optional[Client] = None,
-) -> Union[types.Content, types.File]:
+) -> tuple[types.Content, list[str]]:
     """Convert ChatMessages to Gemini-specific history, including ImageDocuments."""
     parts = []
+    file_api_names = []
     part = None
     for index, block in enumerate(message.blocks):
+        file_api_name = None
+
         if isinstance(block, TextBlock):
             if block.text:
                 part = types.Part.from_text(text=block.text)
@@ -294,10 +294,9 @@ async def chat_message_to_gemini(
                 else "image/jpeg"  # TODO: Fail?
             )
 
-            part = await create_file_part(file_buffer, mime_type, file_mode, client)
-
-            if isinstance(part, types.File):
-                return part  # Return the file as it is a message content and not a part
+            part, file_api_name = await create_file_part(
+                file_buffer, mime_type, file_mode, client
+            )
         elif isinstance(block, VideoBlock):
             file_buffer = block.resolve_video(as_base64=False)
 
@@ -307,13 +306,10 @@ async def chat_message_to_gemini(
                 else "video/mp4"  # TODO: Fail?
             )
 
-            part = await create_file_part(file_buffer, mime_type, file_mode, client)
-
-            if isinstance(part, types.File):
-                return part  # Return the file as it is a message content and not a part
-
+            part, file_api_name = await create_file_part(
+                file_buffer, mime_type, file_mode, client
+            )
             part.video_metadata = types.VideoMetadata(fps=block.fps)
-
         elif isinstance(block, DocumentBlock):
             file_buffer = block.resolve_document()
 
@@ -323,10 +319,9 @@ async def chat_message_to_gemini(
                 else "application/pdf"
             )
 
-            part = await create_file_part(file_buffer, mime_type, file_mode, client)
-
-            if isinstance(part, types.File):
-                return part  # Return the file as it is a message content and not a part
+            part, file_api_name = await create_file_part(
+                file_buffer, mime_type, file_mode, client
+            )
         elif isinstance(block, ThinkingBlock):
             if block.content:
                 part = types.Part.from_text(text=block.content)
@@ -337,6 +332,10 @@ async def chat_message_to_gemini(
         else:
             msg = f"Unsupported content block type: {type(block).__name__}"
             raise ValueError(msg)
+
+        if file_api_name is not None:
+            file_api_names.append(file_api_name)
+
         if part is not None:
             if message.role == MessageRole.MODEL:
                 thought_signatures = message.additional_kwargs.get(
@@ -372,12 +371,12 @@ async def chat_message_to_gemini(
         )
         return types.Content(
             role=ROLES_TO_GEMINI[message.role], parts=[function_response_part]
-        )
+        ), file_api_names
 
     return types.Content(
         role=ROLES_TO_GEMINI[message.role],
         parts=parts,
-    )
+    ), file_api_names
 
 
 def convert_schema_to_function_declaration(
@@ -414,16 +413,16 @@ class ChatParams(typing.TypedDict):
 async def prepare_chat_params(
     model: str,
     messages: Sequence[ChatMessage],
-    use_file_api: bool = False,
+    file_mode: Literal["inline", "fileapi", "hybrid"] = "hybrid",
     client: Optional[Client] = None,
     **kwargs: Any,
-) -> tuple[Union[types.Content, types.File], ChatParams]:
+) -> tuple[types.Content, ChatParams, list[str]]:
     """
     Prepare common parameters for chat creation.
 
     Args:
         messages: Sequence of chat messages
-        use_file_api: Whether to use File API or not for large files.
+        file_mode: The mode for file uploading
         client: Google Genai client used for uploading large files.
         **kwargs: Additional keyword arguments
 
@@ -431,6 +430,7 @@ async def prepare_chat_params(
         tuple containing:
         - next_msg: the next message to send
         - chat_kwargs: processed keyword arguments for chat creation
+        - file_api_names: list of file api names to delete after chat call
 
     """
     # Extract system message if present
@@ -442,12 +442,14 @@ async def prepare_chat_params(
 
     # Merge messages with the same role
     merged_messages = merge_neighboring_same_role_messages(messages)
-    initial_history = await asyncio.gather(
+    initial_history_and_names = await asyncio.gather(
         *[
-            chat_message_to_gemini(message, use_file_api, client)
+            chat_message_to_gemini(message, file_mode, client)
             for message in merged_messages
         ]
     )
+    initial_history = [it[0] for it in initial_history_and_names]
+    file_api_names = [name for it in initial_history_and_names for name in it[1]]
 
     # merge tool messages into a single tool message
     # while maintaining the tool names
@@ -514,7 +516,7 @@ async def prepare_chat_params(
 
     chat_kwargs["config"] = types.GenerateContentConfig(**config)
 
-    return next_msg, chat_kwargs
+    return next_msg, chat_kwargs, file_api_names
 
 
 def handle_streaming_flexible_model(
diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py b/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py
index 6be1799fc3..7f45d76874 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/tests/test_llms_google_genai.py
@@ -758,7 +758,7 @@ async def test_prepare_chat_params_more_than_2_tool_calls():
         ChatMessage(content="Here is a list of puppies.", role=MessageRole.ASSISTANT),
     ]
 
-    next_msg, chat_kwargs = await prepare_chat_params(
+    next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
         expected_model_name, test_messages
     )
 
@@ -817,7 +817,9 @@ async def test_prepare_chat_params_with_system_message():
     ]
 
     # Execute prepare_chat_params
-    next_msg, chat_kwargs = await prepare_chat_params(model_name, messages)
+    next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
+        model_name, messages
+    )
 
     # Verify system_prompt is forwarded to system_instruction
     cfg = chat_kwargs["config"]
@@ -1031,7 +1033,7 @@ async def test_cached_content_in_chat_params() -> None:
     messages = [ChatMessage(content="Test message", role=MessageRole.USER)]
 
     # Prepare chat params with the LLM's generation config
-    next_msg, chat_kwargs = await prepare_chat_params(
+    next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
         llm.model, messages, generation_config=llm._generation_config
     )
 
@@ -1199,7 +1201,7 @@ async def test_built_in_tool_in_chat_params() -> None:
         )
 
         # Prepare chat params
-        next_msg, chat_kwargs = await prepare_chat_params(
+        next_msg, chat_kwargs, file_api_names = await prepare_chat_params(
             llm.model, messages, generation_config=llm._generation_config
         )
 

From b4eaf8e6ff2af051fdb9d455372b9dac412cea60 Mon Sep 17 00:00:00 2001
From: Florian-BACHO <florian.bacho.ext@mediatransports.com>
Date: Thu, 6 Nov 2025 12:06:41 +0100
Subject: [PATCH 6/6] Fix lint

---
 .../llama_index/llms/google_genai/utils.py                       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
index 31c155cf8f..8098ccbbbc 100644
--- a/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-google-genai/llama_index/llms/google_genai/utils.py
@@ -14,7 +14,6 @@
     Literal,
     cast,
 )
-from io import BytesIO
 import typing
 
 import google.genai.types as types