Merge branch 'main' into ODSC-63450/oci_odsc_llm

run-llama · Feb 2, 2025 · bac8343 · bac8343
2 parents fc8489a + 4c81fff
commit bac8343
Show file tree

Hide file tree

Showing 36 changed files with 597 additions and 228 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,53 @@
 # ChangeLog
 
+## [2025-01-31]
+
+### `llama-index-core` [0.12.15]
+
+- Add error_on_tool_error param to FunctionCallingLLM.predict_and_call (#17663)
+- Get tool description from pydantic field (#17679)
+- fix: make ctx.\_events_buffer json-serializable (#17676)
+- feat: allow to exclude empty file simple directory reader (#17656)
+- improve markdown llm output parsing (#17577)
+- small typo fix in the default plan refine prompt (#17644)
+
+### `llama-index-agent-openai` [0.4.3]
+
+- fix repeated sources when doing parallel tool calling (#17645)
+
+### `llama-index-embeddings-text-embeddings-inference` [0.3.2]
+
+- Add endpoint parameter to TextEmbeddingsInference (#17598)
+
+### `llama-index-llms-bedrock-converse` [0.4.5]
+
+- fix bedrock function calling (#17658)
+
+### `llama-index-llms-cortex` [0.1.0]
+
+- Add Snowflake Cortex Integration (#17585)
+
+### `llama-index-llms-fireworks` [0.3.2]
+
+- Deepseek-r1 is now supported by fireworks (#17657)
+- Deepseek-v3 is now supported by fireworks (#17518)
+
+### `llama-index-llms-gemini` [0.4.5]
+
+- adding the chat decorators to async calls (#17678)
+
+### `llama-index-llms-llama-cpp` [0.4.0]
+
+- update llama-cpp integration + docs (#17647)
+
+### `llama-index-vector-stores-azureaisearch` [0.3.3]
+
+- Feat/fix Azure AI Search Hybrid Semantic Search Unusability due to hardcoded parameter (#17683)
+
+### `llama-index-vector-stores-pinecone` [0.4.4]
+
+- `get_nodes()` now accepts include_values param to return embeddings (#17635)
+
 ## [2025-01-25]
 
 ### `llama-index-core` [0.12.14]

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -172,15 +172,19 @@ LlamaIndex is organized as a **monorepo**, meaning different packages live withi
    ```bash
    curl -sSL https://install.python-poetry.org | python3 -
    ```
-2. Activate the environment:
+2. Install the Poetry shell plugin (if you don't already have it):
+   ```bash
+   poetry self add poetry-plugin-shell
+   ```
+3. Activate the environment:
    ```bash
    poetry shell
    ```
-3. Install dependencies:
+4. Install dependencies:
    ```bash
    poetry install --only dev,docs --no-root
    ```
-4. Install the package(s) you want to work on. You will for sure need to install `llama-index-core`:
+5. Install the package(s) you want to work on. You will for sure need to install `llama-index-core`:
 
    ```bash
    pip install -e llama-index-core

diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md
@@ -1,5 +1,53 @@
 # ChangeLog
 
+## [2025-01-31]
+
+### `llama-index-core` [0.12.15]
+
+- Add error_on_tool_error param to FunctionCallingLLM.predict_and_call (#17663)
+- Get tool description from pydantic field (#17679)
+- fix: make ctx._events_buffer json-serializable (#17676)
+- feat: allow to exclude empty file simple directory reader (#17656)
+- improve markdown llm output parsing (#17577)
+- small typo fix in the default plan refine prompt (#17644)
+
+### `llama-index-agent-openai` [0.4.3]
+
+- fix repeated sources when doing parallel tool calling (#17645)
+
+### `llama-index-embeddings-text-embeddings-inference` [0.3.2]
+
+- Add endpoint parameter to TextEmbeddingsInference (#17598)
+
+### `llama-index-llms-bedrock-converse` [0.4.5]
+
+- fix bedrock function calling (#17658)
+
+### `llama-index-llms-cortex` [0.1.0]
+
+- Add Snowflake Cortex Integration (#17585)
+
+### `llama-index-llms-fireworks` [0.3.2]
+
+- Deepseek-r1 is now supported by fireworks (#17657)
+- Deepseek-v3 is now supported by fireworks (#17518)
+
+### `llama-index-llms-gemini` [0.4.5]
+
+- adding the chat decorators to async calls (#17678)
+
+### `llama-index-llms-llama-cpp` [0.4.0]
+
+- update llama-cpp integration + docs (#17647)
+
+### `llama-index-vector-stores-azureaisearch` [0.3.3]
+
+- Feat/fix Azure AI Search Hybrid Semantic Search Unusability due to hardcoded parameter (#17683)
+
+### `llama-index-vector-stores-pinecone` [0.4.4]
+
+- `get_nodes()` now accepts include_values param to return embeddings (#17635)
+
 ## [2025-01-25]
 
 ### `llama-index-core` [0.12.14]

diff --git a/docs/docs/CONTRIBUTING.md b/docs/docs/CONTRIBUTING.md
@@ -172,15 +172,19 @@ LlamaIndex is organized as a **monorepo**, meaning different packages live withi
    ```bash
    curl -sSL https://install.python-poetry.org | python3 -
    ```
-2. Activate the environment:
+2. Install the Poetry shell plugin (if you don't already have it):
+   ```bash
+   poetry self add poetry-plugin-shell
+   ```
+3. Activate the environment:
    ```bash
    poetry shell
    ```
-3. Install dependencies:
+4. Install dependencies:
    ```bash
    poetry install --only dev,docs --no-root
    ```
-4. Install the package(s) you want to work on. You will for sure need to install `llama-index-core`:
+5. Install the package(s) you want to work on. You will for sure need to install `llama-index-core`:
 
    ```bash
    pip install -e llama-index-core

diff --git a/docs/docs/api_reference/llms/cortex.md b/docs/docs/api_reference/llms/cortex.md
@@ -0,0 +1,4 @@
+::: llama_index.llms.cortex
+    options:
+      members:
+        - Cortex
diff --git a/docs/docs/examples/agent/agent_workflow_multi.ipynb b/docs/docs/examples/agent/agent_workflow_multi.ipynb
@@ -6,9 +6,11 @@
    "source": [
     "# Multi-Agent Research Workflow with AgentWorkflow\n",
     "\n",
+    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/agent/agent_workflow_multi.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
     "In this notebook, we will explore how to use the `AgentWorkflow` class to create multi-agent systems. Specifically, we will create a system that can generate a report on a given topic.\n",
     "\n",
-    "This notebook will assume that you have already either read the [basic agent workflow notebook](https://docs.llamaindex.ai/en/stable/examples/agent/agent_workflow_basic.ipynb) or the [agent workflow documentation](https://docs.llamaindex.ai/en/stable/understanding/agent/multi_agents.md)."
+    "This notebook will assume that you have already either read the [basic agent workflow notebook](https://docs.llamaindex.ai/en/stable/examples/agent/agent_workflow_basic) or the [agent workflow documentation](https://docs.llamaindex.ai/en/stable/understanding/agent/multi_agents.md)."
    ]
   },
   {

diff --git a/docs/docs/examples/retrievers/bm25_retriever.ipynb b/docs/docs/examples/retrievers/bm25_retriever.ipynb
@@ -621,7 +621,7 @@
    "source": [
     "storage_context.docstore.persist(\"./docstore.json\")\n",
     "\n",
-    "# or, we could ignore the docstore and just persist the bm25 retriever as shown above\n",
+    "# or, we could ignore the docstore and just persist the bm25 retriever as shown below\n",
     "# bm25_retriever.persist(\"./bm25_retriever\")"
    ]
   },

diff --git a/docs/docs/module_guides/workflow/index.md b/docs/docs/module_guides/workflow/index.md
@@ -274,7 +274,7 @@ class MyWorkflow(Workflow):
         self, ctx: Context, ev: GatherEvent | MyEventResult
     ) -> StopEvent | None:
         # wait for events to finish
-        events = ctx.collect_events([MyEventResult, MyEventResult])
+        events = ctx.collect_events(ev, [MyEventResult, MyEventResult])
         if not events:
             return None
 

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -350,9 +350,9 @@ nav:
           - ./examples/llm/langchain.ipynb
           - ./examples/llm/litellm.ipynb
           - ./examples/llm/llama_2.ipynb
-          - ./examples/llm/llama_cpp.ipynb
           - ./examples/llm/llama_2_rap_battle.ipynb
           - ./examples/llm/llama_api.ipynb
+          - ./examples/llm/llama_cpp.ipynb
           - ./examples/llm/llamafile.ipynb
           - ./examples/llm/llm_predictor.ipynb
           - ./examples/llm/lmstudio.ipynb
@@ -1004,6 +1004,7 @@ nav:
           - ./api_reference/llms/clarifai.md
           - ./api_reference/llms/cleanlab.md
           - ./api_reference/llms/cohere.md
+          - ./api_reference/llms/cortex.md
           - ./api_reference/llms/custom_llm.md
           - ./api_reference/llms/dashscope.md
           - ./api_reference/llms/databricks.md
@@ -2344,6 +2345,7 @@ plugins:
             - ../llama-index-integrations/llms/llama-index-llms-stepfun
             - ../llama-index-integrations/tools/llama-index-tools-linkup-research
             - ../llama-index-integrations/llms/llama-index-llms-deepseek
+            - ../llama-index-integrations/llms/llama-index-llms-cortex
   - redirects:
       redirect_maps:
         ./api/llama_index.vector_stores.MongoDBAtlasVectorSearch.html: api_reference/storage/vector_store/mongodb.md

diff --git a/llama-index-core/llama_index/core/__init__.py b/llama-index-core/llama_index/core/__init__.py
@@ -1,6 +1,6 @@
 """Init file of LlamaIndex."""
 
-__version__ = "0.12.14"
+__version__ = "0.12.15"
 
 import logging
 from logging import NullHandler

diff --git a/llama-index-core/llama_index/core/agent/workflow/multi_agent_workflow.py b/llama-index-core/llama_index/core/agent/workflow/multi_agent_workflow.py
@@ -232,31 +232,44 @@ async def init_run(self, ctx: Context, ev: StartEvent) -> AgentInput:
         """Sets up the workflow and validates inputs."""
         await self._init_context(ctx, ev)
 
-        user_msg = ev.get("user_msg")
-        chat_history = ev.get("chat_history")
-        if user_msg and chat_history:
-            raise ValueError("Cannot provide both user_msg and chat_history")
+        user_msg: Optional[Union[str, ChatMessage]] = ev.get("user_msg")
+        chat_history: Optional[List[ChatMessage]] = ev.get("chat_history", [])
 
+        # Convert string user_msg to ChatMessage
         if isinstance(user_msg, str):
             user_msg = ChatMessage(role="user", content=user_msg)
 
-        await ctx.set("user_msg_str", user_msg.content)
-
         # Add messages to memory
         memory: BaseMemory = await ctx.get("memory")
+
+        # First set chat history if it exists
+        if chat_history:
+            memory.set(chat_history)
+
+        # Then add user message if it exists
+        current_state = await ctx.get("state")
         if user_msg:
-            # Add the state to the user message if it exists and if requested
-            current_state = await ctx.get("state")
+            # Add the state to the user message if it exists
             if current_state:
                 user_msg.content = self.state_prompt.format(
                     state=current_state, msg=user_msg.content
                 )
-
             await memory.aput(user_msg)
-            input_messages = memory.get(input=user_msg.content)
+            await ctx.set("user_msg_str", user_msg.content)
+        elif chat_history:
+            # If no user message, use the last message from chat history as user_msg_str
+            last_msg = chat_history[-1].content or ""
+            await ctx.set("user_msg_str", last_msg)
+
+            if current_state:
+                chat_history[-1].content = self.state_prompt.format(
+                    state=current_state, msg=chat_history[-1].content
+                )
         else:
-            memory.set(chat_history)
-            input_messages = memory.get()
+            raise ValueError("Must provide either user_msg or chat_history")
+
+        # Get all messages from memory
+        input_messages = memory.get()
 
         # send to the current agent
         current_agent_name: str = await ctx.get("current_agent_name")

diff --git a/llama-index-core/llama_index/core/llms/function_calling.py b/llama-index-core/llama_index/core/llms/function_calling.py
@@ -161,6 +161,7 @@ def predict_and_call(
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
         error_on_no_tool_call: bool = True,
+        error_on_tool_error: bool = False,
         **kwargs: Any,
     ) -> "AgentChatResponse":
         """Predict and call the tool."""
@@ -193,7 +194,15 @@ def predict_and_call(
             call_tool_with_selection(tool_call, tools, verbose=verbose)
             for tool_call in tool_calls
         ]
-        if allow_parallel_tool_calls:
+        tool_outputs_with_error = [
+            tool_output for tool_output in tool_outputs if tool_output.is_error
+        ]
+        if error_on_tool_error and len(tool_outputs_with_error) > 0:
+            error_text = "\n\n".join(
+                [tool_output.content for tool_output in tool_outputs]
+            )
+            raise ValueError(error_text)
+        elif allow_parallel_tool_calls:
             output_text = "\n\n".join(
                 [tool_output.content for tool_output in tool_outputs]
             )
@@ -218,6 +227,7 @@ async def apredict_and_call(
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
         error_on_no_tool_call: bool = True,
+        error_on_tool_error: bool = False,
         **kwargs: Any,
     ) -> "AgentChatResponse":
         """Predict and call the tool."""
@@ -252,7 +262,15 @@ async def apredict_and_call(
             for tool_call in tool_calls
         ]
         tool_outputs = await asyncio.gather(*tool_tasks)
-        if allow_parallel_tool_calls:
+        tool_outputs_with_error = [
+            tool_output for tool_output in tool_outputs if tool_output.is_error
+        ]
+        if error_on_tool_error and len(tool_outputs_with_error) > 0:
+            error_text = "\n\n".join(
+                [tool_output.content for tool_output in tool_outputs]
+            )
+            raise ValueError(error_text)
+        elif allow_parallel_tool_calls:
             output_text = "\n\n".join(
                 [tool_output.content for tool_output in tool_outputs]
             )

diff --git a/llama-index-core/llama_index/core/tools/utils.py b/llama-index-core/llama_index/core/tools/utils.py
@@ -42,6 +42,8 @@ def create_schema_from_function(
             param_type = args[0]
             if isinstance(args[1], str):
                 description = args[1]
+            elif isinstance(args[1], FieldInfo):
+                description = args[1].description
 
         if param_type is params[param_name].empty:
             param_type = Any

diff --git a/llama-index-core/llama_index/core/workflow/context.py b/llama-index-core/llama_index/core/workflow/context.py
@@ -1,14 +1,14 @@
 import asyncio
 import json
-import warnings
 import uuid
+import warnings
 from collections import defaultdict
-from typing import Dict, Any, Optional, List, Type, TYPE_CHECKING, Set, Tuple, TypeVar
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Type, TypeVar
 
 from .context_serializers import BaseSerializer, JsonSerializer
 from .decorators import StepConfig
-from .events import Event
 from .errors import WorkflowRuntimeError
+from .events import Event
 
 if TYPE_CHECKING:  # pragma: no cover
     from .workflow import Workflow
@@ -60,7 +60,7 @@ def __init__(
         self._lock = asyncio.Lock()
         self._globals: Dict[str, Any] = {}
         # Step-specific instance
-        self._events_buffer: Dict[Type[Event], List[Event]] = defaultdict(list)
+        self._events_buffer: Dict[str, List[Event]] = defaultdict(list)
 
     def _serialize_queue(self, queue: asyncio.Queue, serializer: BaseSerializer) -> str:
         queue_items = list(queue._queue)  # type: ignore
@@ -240,14 +240,17 @@ def session(self) -> "Context":
         warnings.warn(msg, DeprecationWarning)
         return self
 
+    def _get_full_path(self, ev_type: Type[Event]) -> str:
+        return f"{ev_type.__module__}.{ev_type.__name__}"
+
     def collect_events(
         self, ev: Event, expected: List[Type[Event]]
     ) -> Optional[List[Event]]:
-        self._events_buffer[type(ev)].append(ev)
+        self._events_buffer[self._get_full_path(type(ev))].append(ev)
 
         retval: List[Event] = []
         for e_type in expected:
-            e_instance_list = self._events_buffer.get(e_type)
+            e_instance_list = self._events_buffer.get(self._get_full_path(e_type))
             if e_instance_list:
                 retval.append(e_instance_list.pop(0))
 
@@ -256,7 +259,7 @@ def collect_events(
 
         # put back the events if unable to collect all
         for ev in retval:
-            self._events_buffer[type(ev)].append(ev)
+            self._events_buffer[self._get_full_path(type(ev))].append(ev)
 
         return None