From 2204d8cb7d28b78e02f8192a1e9d8e77b0a19151 Mon Sep 17 00:00:00 2001
From: Anneli Samuel <82824520+anneli-samuel@users.noreply.github.com>
Date: Thu, 1 Aug 2024 19:39:04 +0300
Subject: [PATCH] community[patch]: Invoke on_llm_new_token callback before
 yielding chunk (#24938)

**Description**: Invoke on_llm_new_token callback before yielding chunk
in streaming mode
**Issue**:
[#16913](https://github.com/langchain-ai/langchain/issues/16913)
---
 libs/community/langchain_community/chat_models/mlx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/chat_models/mlx.py b/libs/community/langchain_community/chat_models/mlx.py
index 1bd9f740d782f..e3a28c73c7fce 100644
--- a/libs/community/langchain_community/chat_models/mlx.py
+++ b/libs/community/langchain_community/chat_models/mlx.py
@@ -186,9 +186,9 @@ def _stream(
             # yield text, if any
             if text:
                 chunk = ChatGenerationChunk(message=AIMessageChunk(content=text))
-                yield chunk
                 if run_manager:
                     run_manager.on_llm_new_token(text, chunk=chunk)
+                yield chunk
 
             # break if stop sequence found
             if token == eos_token_id or (stop is not None and text in stop):