From cf21ca358478919207049695ba6b31dc6e0b2673 Mon Sep 17 00:00:00 2001
From: AlexeyChernenkoPlato <alexey@platoprotocol.com>
Date: Sat, 22 Nov 2025 09:33:36 -0800
Subject: [PATCH] fix: double function response processing issue

Merge https://github.com/google/adk-python/pull/2588

## Description
Fixes an issue in `base_llm_flow.py` where, in Bidi-streaming (live) mode, the multi-agent structure causes duplicated responses after tool calling.

## Problem
In Bidi-streaming (live) mode, when utilizing a multi-agent structure, the leaf-level sub-agent and its parent agent both process the same function call response, leading to duplicate replies. This duplication occurs because the parent agent's live connection remains open while initiating a new connection with the child agent.

## Root Cause
The issue originated from the placement of agent transfer logic in the `_postprocess_live` method at lines 547-557. When a `transfer_to_agent` function call was made:

1. The function response was processed in `_postprocess_live`
2. A recursive call to `agent_to_run.run_live` was initiated
3. This prevented the closure of the parent agent's connection at line 175 of the `run_live` method, as that code path was never reached
4. Both the parent and child agents remained active, causing both to process subsequent function responses

## Solution
This PR addresses the issue by ensuring the parent agent's live connection is closed before initiating a new one with the child agent. Changes made:

**Connection Management**: Moved the agent transfer logic from `_postprocess_live` method to the `run_live` method, specifically:
- Removed agent transfer handling from lines 547-557 in `_postprocess_live`
- Added agent transfer handling after connection closure at lines 176-184 in `run_live`

**Code Refactoring**: The agent transfer now occurs in the proper sequence:
1. Parent agent processes the `transfer_to_agent` function response
2. Parent agent's live connection is properly closed (line 175)
3. New connection with child agent is initiated (line 182)
4. Child agent handles subsequent function calls without duplication

**Improved Flow Control**: This ensures that each agent processes function call responses without duplication, maintaining proper connection lifecycle management in multi-agent structures.

## Testing
To verify this fix works correctly:

1. **Multi-Agent Structure Test**: Set up a multi-agent structure with a parent agent that transfers to a child agent via `transfer_to_agent` function call
2. **Bidi-Streaming Mode**: Enable Bidi-streaming (live) mode in the configuration
3. **Function Call Verification**: Trigger a function call that results in agent transfer
4. **Response Monitoring**: Verify that only one response is generated (not duplicated)
5. **Connection Management**: Confirm that parent agent's connection is properly closed before child agent starts

**Expected Behavior**:
- Single function response per call
- Clean agent handoffs without connection leaks
- Proper connection lifecycle management

## Backward Compatibility
This change is **fully backward compatible**:
- No changes to public APIs or method signatures
- Existing single-agent flows remain unaffected
- Non-live (regular async) flows continue to work as before
- Only affects the internal flow control in live multi-agent scenarios

Co-authored-by: Hangfei Lin <hangfei@google.com>
COPYBARA_INTEGRATE_REVIEW=https://github.com/google/adk-python/pull/2588 from AlexeyChernenkoPlato:fix/double-function-response-processing-issue 3339260a4e007251137d199bdcef0ddef4487b03
PiperOrigin-RevId: 835619170
---
 .../adk/flows/llm_flows/base_llm_flow.py      | 35 +++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
index db50e77809..824cd26be1 100644
--- a/src/google/adk/flows/llm_flows/base_llm_flow.py
+++ b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -156,7 +156,7 @@ async def run_live(
                   break
                 logger.debug('Receive new event: %s', event)
                 yield event
-                # send back the function response
+                # send back the function response to models
                 if event.get_function_responses():
                   logger.debug(
                       'Sending back last function response event: %s', event
@@ -164,6 +164,16 @@ async def run_live(
                   invocation_context.live_request_queue.send_content(
                       event.content
                   )
+                # We handle agent transfer here in `run_live` rather than
+                # in `_postprocess_live` to prevent duplication of function
+                # response processing. If agent transfer were handled in
+                # `_postprocess_live`, events yielded from child agent's
+                # `run_live` would bubble up to parent agent's `run_live`,
+                # causing `event.get_function_responses()` to be true in both
+                # child and parent, and `send_content()` to be called twice for
+                # the same function response. By handling agent transfer here,
+                # we ensure that only child agent processes its own function
+                # responses after the transfer.
                 if (
                     event.content
                     and event.content.parts
@@ -174,7 +184,21 @@ async def run_live(
                   await asyncio.sleep(DEFAULT_TRANSFER_AGENT_DELAY)
                   # cancel the tasks that belongs to the closed connection.
                   send_task.cancel()
+                  logger.debug('Closing live connection')
                   await llm_connection.close()
+                  logger.debug('Live connection closed.')
+                  # transfer to the sub agent.
+                  transfer_to_agent = event.actions.transfer_to_agent
+                  if transfer_to_agent:
+                    logger.debug('Transferring to agent: %s', transfer_to_agent)
+                    agent_to_run = self._get_agent_to_run(
+                        invocation_context, transfer_to_agent
+                    )
+                    async with Aclosing(
+                        agent_to_run.run_live(invocation_context)
+                    ) as agen:
+                      async for item in agen:
+                        yield item
                 if (
                     event.content
                     and event.content.parts
@@ -638,15 +662,6 @@ async def _postprocess_live(
         )
         yield final_event
 
-      transfer_to_agent = function_response_event.actions.transfer_to_agent
-      if transfer_to_agent:
-        agent_to_run = self._get_agent_to_run(
-            invocation_context, transfer_to_agent
-        )
-        async with Aclosing(agent_to_run.run_live(invocation_context)) as agen:
-          async for item in agen:
-            yield item
-
   async def _postprocess_run_processors_async(
       self, invocation_context: InvocationContext, llm_response: LlmResponse
   ) -> AsyncGenerator[Event, None]: