Support automatic model configuration on agents

jlowin · jlowin · commit 69ef1d1c27dd · 2024-09-12T08:51:38.000-04:00
diff --git a/docs/concepts/agents.mdx b/docs/concepts/agents.mdx
@@ -31,7 +31,6 @@ A more complex agent can be created by providing additional configuration. This
 
 ```python
 import controlflow as cf
-from langchain_openai import ChatOpenAI
 
 agent = cf.Agent(
     name="Data Analyst",
@@ -41,7 +40,7 @@ agent = cf.Agent(
         "Browse the web for data and use Python to analyze it."
     ),
     tools=[cf.tools.web.get_url, cf.tools.code.python],
-    model=ChatpOpenAI('gpt-4o-mini'),
+    model="openai/gpt-4o",
     interactive=True,
 )
 ```
diff --git a/docs/examples/call-routing.mdx b/docs/examples/call-routing.mdx
@@ -17,7 +17,6 @@ As you run this example, you'll see the conversation unfold in real-time, culmin
 
 ```python
 import random
-from enum import Enum
 import controlflow as cf
 
 DEPARTMENTS = [
diff --git a/docs/examples/features/multi-llm.mdx b/docs/examples/features/multi-llm.mdx
@@ -16,17 +16,12 @@ In this scenario, we'll create a workflow that analyzes customer feedback for a
 
 ```python
 import controlflow as cf
-from langchain_openai import ChatOpenAI
 from pydantic import BaseModel
 from typing import Literal
 
-# Define our models
-gpt4_mini = ChatOpenAI(model="gpt-4o-mini")
-gpt4 = ChatOpenAI(model="gpt-4o")
-
 # Create specialized agents
-classifier = cf.Agent(name="Classifier", model=gpt4_mini)
-summarizer = cf.Agent(name="Summarizer", model=gpt4)
+classifier = cf.Agent(name="Classifier", model="openai/gpt-4o-mini")
+summarizer = cf.Agent(name="Summarizer", model="openai/gpt-4o")
 
 # Define our data models
 class Feedback(BaseModel):
diff --git a/docs/examples/headline-categorization.mdx b/docs/examples/headline-categorization.mdx
@@ -12,9 +12,8 @@ The following code creates a function that classifies a given news headline into
 
 ```python
 import controlflow as cf
-from langchain_openai import ChatOpenAI
 
-classifier = cf.Agent(model=ChatOpenAI(model="gpt-4o-mini"))
+classifier = cf.Agent(model="openai/gpt-4o-mini")
 
 def classify_news(headline: str) -> str:
     return cf.run(
@@ -57,7 +56,7 @@ This implementation showcases several important ControlFlow features that enable
 1. **[Agents](/concepts/agents)**: We create an agent with a specific LLM model (GPT-4o mini) to perform the headline classification.
 
    ```python
-   classifier = cf.Agent(model=ChatOpenAI(model="gpt-4o-mini"))
+   classifier = cf.Agent(model="openai/gpt-4o-mini")
    ```
 
 2. **[Result types](/concepts/tasks/task-results)**: We use a list of strings as the `result_type` to constrain the output to one of the predefined categories. This ensures that the classification result is always one of the specified options.
diff --git a/docs/examples/named-entity-recognition.mdx b/docs/examples/named-entity-recognition.mdx
@@ -12,12 +12,11 @@ First, let's implement a function that extracts a simple list of entities:
 
 ```python
 import controlflow as cf
-from langchain_openai import ChatOpenAI
 from typing import List
 
 extractor = cf.Agent(
     name="Named Entity Recognizer",
-    model=ChatOpenAI(model="gpt-4o-mini"),
+    model="openai/gpt-4o-mini",
 )
 
 def extract_entities(text: str) -> List[str]:
@@ -86,7 +85,7 @@ This implementation showcases several important ControlFlow features that enable
    ```python
    extractor = cf.Agent(
        name="Named Entity Recognizer",
-       model=ChatOpenAI(model="gpt-4o-mini"),
+       model="openai/gpt-4o-mini",
    )
    ```
 
diff --git a/docs/examples/sentiment-classifier.mdx b/docs/examples/sentiment-classifier.mdx
@@ -13,9 +13,8 @@ The following code creates a function that classifies the sentiment of a given t
 ```python
 import controlflow as cf
 from controlflow.tasks.validators import between
-from langchain_openai import ChatOpenAI
 
-optimist = cf.Agent(model=ChatOpenAI(model="gpt-4o-mini"))
+optimist = cf.Agent(model="openai/gpt-4o-mini")
 
 def sentiment(text: str) -> float:
     return cf.run(
@@ -58,7 +57,7 @@ This implementation showcases several important ControlFlow features that enable
 1. **[Agents](/concepts/agents)**: We create an agent with a specific LLM model (GPT-4o mini) to perform the sentiment analysis.
 
    ```python
-   optimist = cf.Agent(model=ChatOpenAI(model="gpt-4o-mini"))
+   optimist = cf.Agent(model="openai/gpt-4o-mini")
    ```
 
 3. **[Result types](/concepts/tasks/task-results)**: We specify `result_type=float` to ensure the sentiment score is returned as a float value.
diff --git a/docs/guides/llms.mdx b/docs/guides/llms.mdx
@@ -19,38 +19,72 @@ Every ControlFlow agent can be assigned a specific LLM. When instantiating an ag
 ControlFlow agents can use any LangChain LLM class that supports chat-based APIs and tool calling. For a complete list of available models, settings, and instructions, please see LangChain's [LLM provider documentation](https://python.langchain.com/docs/integrations/chat/).
 
 <Tip>
-ControlFlow includes OpenAI and Azure OpenAI models by default. To use other models, you'll need to first install the corresponding LangChain package and supply any required credentials. See the model's [documentation](https://python.langchain.com/docs/integrations/chat/) for more information.
+ControlFlow includes the required packages for OpenAI, Azure OpenAI, and Anthropic models by default. To use other models, you'll need to first install the corresponding LangChain package and supply any required credentials. See the model's [documentation](https://python.langchain.com/docs/integrations/chat/) for more information.
 </Tip>
 
 
+### Automatic configuration
+
+ControlFlow can automatically load LLMs from certain providers, based on a parameter. The model parameter must have the form `{provider key}/{model name}`. 
+
+For example:
+```python
+import controlflow as cf
+
+openai_agent = cf.Agent(model="openai/gpt-4o-mini")
+anthropic_agent = cf.Agent(model="anthropic/claude-3-haiku-20240307")
+groq_agent = cf.Agent(model="groq/mixtral-8x7b-32768")
+```
+
+Note that loading a model from a string is convenient, but does not allow you to configure all of the model's parameters. For full control, see the docs on [manual configuration](#manual-configuration).
+
+At this time, supported providers for automatic configuration include:
+
+| Provider | Provider key | Required dependencies |
+| -------- | ----------------- | ----------------- |
+| OpenAI   | `openai`       | (included) |
+| Azure OpenAI | `azure-openai` | (included) |
+| Anthropic | `anthropic` | (included) |
+| Google   | `google`       | `langchain_google_genai` |
+| Groq      | `groq`       | `langchain_groq` |
+
+If the required dependencies are not installed, ControlFlow will be unable to load the model and will raise an error.
+
+
+### Manual configuration
+
+
 To configure a different LLM, follow these steps:
 <Steps>
 <Step title="Install required packages">
-To use an LLM, first make sure you have installed the appropriate provider package. ControlFlow only includes `langchain_openai` by default. For example, to use an Anthropic model, first run:
-```
-pip install langchain_anthropic
+To use an LLM, first make sure you have installed the appropriate [provider package](https://python.langchain.com/docs/integrations/chat/). For example, to use a Google model, run:
+
+```bash
+pip install langchain_google_genai
 ```
 </Step>
 <Step title="Configure API keys">
-You must provide the correct API keys and configuration for the LLM you want to use. These can be provided as environment variables or when you create the model in your script. For example, to use an Anthropic model, set the `ANTHROPIC_API_KEY` environment variable:
+You must provide the correct API keys and configuration for the LLM you want to use. These can be provided as environment variables or when you create the model in your script. For example, to use an OpenAI model, you must set the `OPENAI_API_KEY` environment variable:
 
+```bash
+export OPENAI_API_KEY=<your-api-key>
 ```
-export ANTHROPIC_API_KEY=<your-api-key>
-```
-For model-specific instructions, please refer to the provider's documentation.
+For model-specific instructions, please refer to the provider's [documentation](https://python.langchain.com/docs/integrations/chat/).
 </Step>
+
 <Step title="Create the model">
-Begin by creating the LLM object in your script. For example, to use Claude 3 Opus:
+Create the LLM model in your script, including any additional parameters. For example, to use Claude 3 Opus:
 
 ```python
 from langchain_anthropic import ChatAnthropic
 
 # create the model
 model = ChatAnthropic(model='claude-3-opus-20240229')
 ```
+
 </Step>
 <Step title="Pass the model to an agent">
-Next, create an agent with the specified model:
+Finally, configure an agent with the model:
 
 ```python
 import controlflow as cf
@@ -59,40 +93,8 @@ import controlflow as cf
 agent = cf.Agent(model=model)
 ```
 </Step>
-<Step title='Assign the agent to a task'>
-Finally, assign your agent to a task:
-
-```python
-# assign the agent to a task
-task = cf.Task('Write a short poem about LLMs', agents=[agent])
-
-# (optional) run the task
-task.run()
-```
-</Step>
 </Steps>
 
-<Accordion title="Click here to copy the entire example script">
-
-```python
-import controlflow as cf
-from langchain_anthropic import ChatAnthropic
-
-# create the model
-model = ChatAnthropic(model='claude-3-opus-20240229')
-
-# provide the model to an agent
-agent = cf.Agent(model=model)
-
-# assign the agent to a task
-task = cf.Task('Write a short poem about LLMs', agents=[agent])
-
-# (optional) run the task
-task.run()
-```
-</Accordion>
-
-### Model configuration
 
 In addition to choosing a specific model, you can also configure the model's parameters. For example, you can set the temperature for GPT-4o:
 
@@ -133,33 +135,25 @@ assert cf.Agent('Marvin').model.model_name == 'claude-3-opus-20240229'
 ```
 ### From a string setting
 
-You can also specify a default model using a string, which is convenient though it doesn't allow you to configure advanced model settings. The string must have the form `<provider>/<model name>`.
+You can also specify a default model using a string, which is convenient though it doesn't allow you to configure advanced model settings. This must be a string in the form `{provider key}/{model name}`, following the same guidelines as [automatic LLM configuration](#automatic-configuration).
 
 You can apply this setting either by using an environment variable before you import ControlFlow or in your script at runtime. For example, to use GPT 3.5 Turbo as the default model:
 
 <CodeGroup>
 ```bash Set an environment variable
-export CONTROLFLOW_LLM_MODEL=openai/gpt-3.5-turbo
+export CONTROLFLOW_LLM_MODEL=openai/gpt-4o-mini
 ```
 
 ```python Set a runtime variable
 import controlflow as cf
 # set the default model as a string
-cf.defaults.model = "openai/gpt-3.5-turbo"
+cf.defaults.model = "openai/gpt-4o-mini"
 
 # check that the default model is loaded
-assert cf.Agent('Marvin').model.model_name == 'gpt-3.5-turbo'
+assert cf.Agent('Marvin').model.model_name == 'gpt-4o-mini'
 ```
 </CodeGroup>
 
 <Note>
 The default model can only be set by environment variable before importing ControlFlow. Once ControlFlow is imported, it reads the `controlflow.settings.llm_model` value to create the default model object.
 </Note>
-
-
-At this time, setting the default model via string is only supported for the following providers:
-- `openai`
-- `azure-openai`
-- `anthropic`
-- `google`
-- `groq`
diff --git a/docs/patterns/running-tasks.mdx b/docs/patterns/running-tasks.mdx
@@ -207,10 +207,9 @@ We can also use the `Moderated` strategy to have a more powerful model orchestra
 
 ```python Moderated
 import controlflow as cf
-from langchain_openai import ChatOpenAI
 
-optimist = cf.Agent(name="Optimist", model=ChatOpenAI(model="gpt-4o-mini"))
-pessimist = cf.Agent(name="Pessimist", model=ChatOpenAI(model="gpt-4o-mini"))
+optimist = cf.Agent(name="Optimist", model="gpt-4o-mini")
+pessimist = cf.Agent(name="Pessimist", model="gpt-4o-mini")
 moderator = cf.Agent(name="Moderator")
 
 cf.run(
diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx
@@ -114,15 +114,13 @@ This example uses an OpenAI model, but you can use any LangChain-compatible LLM
 </Warning>
 
 <CodeGroup>
-```python Code
-from langchain_openai import ChatOpenAI
-from enum import Enum
+```python Codem
 
 
 # Create a specialized agent 
 classifier = cf.Agent(
     name="Email Classifier",
-    model=ChatOpenAI(model="gpt-4o-mini"),
+    model="openai/gpt-4o-mini",
     instructions="You are an expert at quickly classifying emails.",
 )
 
@@ -166,21 +164,19 @@ A flow provides a shared context and history for all agents, even across multipl
 <CodeGroup>
 ```python Code
 import controlflow as cf
-from langchain_openai import ChatOpenAI
-from enum import Enum
 
 
 # Create agents
 classifier = cf.Agent(
     name="Email Classifier",
-    model=ChatOpenAI(model="gpt-4o-mini"),
+    model="openai/gpt-4o-mini",
     instructions="You are an expert at quickly classifying emails. Always "
                  "respond with exactly one word: either 'important' or 'spam'."
 )
 
 responder = cf.Agent(
     name="Email Responder",
-    model=ChatOpenAI(model="gpt-4o"),
+    model="openai/gpt-4o",
     instructions="You are an expert at crafting professional email responses. "
                  "Your replies should be concise but friendly."
 )
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,6 +52,8 @@ tests = [
     "pytest-timeout",
     "pytest-xdist",
     "langchain_community",
+    "langchain_google_genai",
+    "langchain_groq",
     "duckduckgo-search",
 ]
 dev = [
diff --git a/src/controlflow/agents/agent.py b/src/controlflow/agents/agent.py
@@ -9,6 +9,7 @@
     AsyncGenerator,
     Generator,
     Optional,
+    Union,
 )
 
 from langchain_core.language_models import BaseChatModel
@@ -19,6 +20,7 @@
 from controlflow.events.base import Event
 from controlflow.instructions import get_instructions
 from controlflow.llm.messages import AIMessage, BaseMessage
+from controlflow.llm.models import get_model as get_model_from_string
 from controlflow.llm.rules import LLMRules
 from controlflow.tools.tools import (
     Tool,
@@ -78,9 +80,9 @@ class Agent(ControlFlowModel, abc.ABC):
 
     # note: `model` should be typed as Optional[BaseChatModel] but V2 models can't have
     # V1 attributes without erroring, so we have to use Any.
-    model: Optional[Any] = Field(
+    model: Optional[Union[str, Any]] = Field(
         None,
-        description="The LangChain BaseChatModel used by the agent. If not provided, the default model will be used.",
+        description="The LangChain BaseChatModel used by the agent. If not provided, the default model will be used. A compatible string can be passed to automatically retrieve the model.",
         exclude=True,
     )
 
@@ -130,6 +132,12 @@ def _generate_id(self):
     def _validate_tools(cls, tools: list[Tool]):
         return as_tools(tools or [])
 
+    @field_validator("model", mode="before")
+    def _validate_model(cls, model: Optional[Union[str, Any]]):
+        if isinstance(model, str):
+            return get_model_from_string(model)
+        return model
+
     @field_serializer("tools")
     def _serialize_tools(self, tools: list[Tool]):
         tools = controlflow.tools.as_tools(tools)
diff --git a/src/controlflow/defaults.py b/src/controlflow/defaults.py
@@ -10,7 +10,7 @@
 
 from .agents import Agent
 from .events.history import History, InMemoryHistory
-from .llm.models import _get_initial_default_model, model_from_string
+from .llm.models import _get_initial_default_model, get_model
 
 __all__ = ["defaults"]
 
@@ -43,7 +43,7 @@ def __repr__(self) -> str:
     @field_validator("model")
     def _model(cls, v):
         if isinstance(v, str):
-            v = model_from_string(v)
+            v = get_model(v)
         elif v is not None and not isinstance(v, BaseChatModel):
             raise ValueError("Input must be an instance of BaseChatModel")
         return v
diff --git a/src/controlflow/llm/models.py b/src/controlflow/llm/models.py
diff --git a/src/controlflow/planning/auto_tasks.py b/src/controlflow/planning/auto_tasks.py
diff --git a/tests/agents/test_agents.py b/tests/agents/test_agents.py
diff --git a/tests/llm/__init__.py b/tests/llm/__init__.py
diff --git a/tests/llm/test_models.py b/tests/llm/test_models.py

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,8 @@ tests = [`
`52`	`52`	`"pytest-timeout",`
`53`	`53`	`"pytest-xdist",`
`54`	`54`	`"langchain_community",`
	`55`	`+ "langchain_google_genai",`
	`56`	`+ "langchain_groq",`
`55`	`57`	`"duckduckgo-search",`
`56`	`58`	`]`
`57`	`59`	`dev = [`