Merge pull request #249 from PrefectHQ/options

jlowin · web-flow · commit ba2338c5ccf4 · 2024-08-02T16:00:06.000-04:00
Better support for complex options
diff --git a/docs/patterns/result-types.mdx b/docs/patterns/result-types.mdx
@@ -71,7 +71,7 @@ assert result is False
 
 ## Constrained Choices
 
-Sometimes you want to limit the possible results to a specific set of values. You can do this by specifying a list of allowed values for the result type:
+Sometimes you want to limit the possible results to a specific set of values, in order to label or classify a response. You can do this by specifying a list of allowed values for the result type:
 
 ```python
 import controlflow as cf
diff --git a/src/controlflow/orchestration/orchestrator.py b/src/controlflow/orchestration/orchestrator.py
@@ -7,16 +7,11 @@
 
 import controlflow
 from controlflow.agents.agent import BaseAgent
-from controlflow.agents.teams import Team
 from controlflow.events.base import Event
 from controlflow.flows import Flow
 from controlflow.orchestration.agent_context import AgentContext
 from controlflow.orchestration.handler import Handler
 from controlflow.tasks.task import Task
-from controlflow.tools.orchestration import (
-    create_task_fail_tool,
-    create_task_success_tool,
-)
 from controlflow.tools.tools import Tool
 from controlflow.utilities.general import ControlFlowModel
 from controlflow.utilities.prefect import prefect_task as prefect_task
@@ -64,6 +59,8 @@ def _handlers(cls, v):
 
     @field_validator("agents", mode="before")
     def _agents(cls, v):
+        from controlflow.agents.teams import Team
+
         if v is None:
             v = {}
 
@@ -205,6 +202,6 @@ def get_tools(self, tasks: list[Task]) -> list[Tool]:
         tools.extend(self.flow.tools)
         for task in tasks:
             tools.extend(task.get_tools())
-            tools.append(create_task_success_tool(task=task))
-            tools.append(create_task_fail_tool(task=task))
+            tools.append(task.create_success_tool())
+            tools.append(task.create_fail_tool())
         return tools
diff --git a/src/controlflow/tasks/task.py b/src/controlflow/tasks/task.py
@@ -6,7 +6,6 @@
     Any,
     Callable,
     GenericAlias,
-    Literal,
     Optional,
     TypeVar,
     Union,
@@ -25,7 +24,7 @@
 import controlflow
 from controlflow.agents import BaseAgent
 from controlflow.instructions import get_instructions
-from controlflow.tools import Tool
+from controlflow.tools import Tool, tool
 from controlflow.tools.talk_to_user import talk_to_user
 from controlflow.utilities.context import ctx
 from controlflow.utilities.general import (
@@ -100,10 +99,10 @@ class Task(ControlFlowModel):
     )
     status: TaskStatus = TaskStatus.PENDING
     result: T = None
-    result_type: Union[type[T], GenericAlias, _LiteralGenericAlias, None] = Field(
+    result_type: Union[type[T], GenericAlias, tuple, None] = Field(
         str,
         description="The expected type of the result. This should be a type"
-        ", generic alias, BaseModel subclass, pd.DataFrame, or pd.Series. "
+        ", generic alias, BaseModel subclass, or list of choices. "
         "Can be None if no result is expected or the agent should communicate internally.",
     )
     error: Union[str, None] = None
@@ -264,9 +263,11 @@ def _default_parent(cls, v):
         return v
 
     @field_validator("result_type", mode="before")
-    def _turn_list_into_literal_result_type(cls, v):
+    def _ensure_result_type_is_list_if_literal(cls, v):
+        if isinstance(v, _LiteralGenericAlias):
+            v = v.__args__
         if isinstance(v, (list, tuple, set)):
-            return Literal[tuple(v)]  # type: ignore
+            v = tuple(v)
         return v
 
     @field_serializer("parent")
@@ -560,6 +561,85 @@ def generate_subtasks(self, instructions: str = None, agent: BaseAgent = None):
                 context=self.context,
             )
 
+    def create_success_tool(self) -> Tool:
+        """
+        Create an agent-compatible tool for marking this task as successful.
+        """
+        options = {}
+        instructions = None
+        result_schema = None
+
+        # if the result_type is a tuple of options, then we want the LLM to provide
+        # a single integer index instead of writing out the entire option
+        if isinstance(self.result_type, tuple):
+            result_schema = int
+            for i, option in enumerate(self.result_type):
+                try:
+                    serialized = TypeAdapter(type(option)).dump_python(option)
+                except PydanticSchemaGenerationError:
+                    serialized = repr(option)
+                options[i] = serialized
+            options_str = "\n\n".join(
+                f"Option {i}: {option}" for i, option in options.items()
+            )
+            instructions = f"""
+                Provide a single integer as the result, corresponding to the index
+                of your chosen option. You options are: {options_str}
+                """
+
+        # otherwise try to load the schema for the result type
+        elif self.result_type is not None:
+            try:
+                TypeAdapter(self.result_type)
+                result_schema = self.result_type
+            except PydanticSchemaGenerationError:
+                pass
+            if result_schema is None:
+                raise ValueError(
+                    f"Could not load or infer schema for result type {self.result_type}. "
+                    "Please use a custom type or add compatibility."
+                )
+
+        @tool(
+            name=f"mark_task_{self.id}_successful",
+            description=f"Mark task {self.id} as successful.",
+            instructions=instructions,
+            private=True,
+            include_return_description=False,
+        )
+        def succeed(result: result_schema) -> str:  # type: ignore
+            if self.is_successful():
+                raise ValueError(
+                    f"{self.friendly_name()} is already marked successful."
+                )
+            if options:
+                if result not in options:
+                    raise ValueError(f"Invalid option. Please choose one of {options}")
+                result = options[result]
+            self.mark_successful(result=result)
+            return f"{self.friendly_name()} marked successful."
+
+        return succeed
+
+    def create_fail_tool(self) -> Tool:
+        """
+        Create an agent-compatible tool for failing this task.
+        """
+
+        @tool(
+            name=f"mark_task_{self.id}_failed",
+            description=(
+                f"Mark task {self.id} as failed. Only use when technical errors prevent success. Provide a detailed reason for the failure."
+            ),
+            private=True,
+            include_return_description=False,
+        )
+        def fail(reason: str) -> str:
+            self.mark_failed(reason=reason)
+            return f"{self.friendly_name()} marked failed."
+
+        return fail
+
     # Deprecated ---------------------------
 
     @deprecated("Use Task.run(steps=1) instead.", version="0.9")
@@ -574,6 +654,11 @@ async def run_once_async(self, *args, **kwargs):
 def validate_result(result: Any, result_type: type[T]) -> T:
     if result_type is None and result is not None:
         raise ValueError("Task has result_type=None, but a result was provided.")
+    elif isinstance(result_type, tuple):
+        if result not in result_type:
+            raise ValueError(
+                f"Result {result} is not in the list of valid result types: {result_type}"
+            )
     elif result_type is not None:
         try:
             result = TypeAdapter(result_type).validate_python(result)
@@ -594,3 +679,22 @@ def validate_result(result: Any, result_type: type[T]) -> T:
         #     result = pd.Series(**result)
 
     return result
+
+
+def _generate_result_schema(result_type: type[T]) -> type[T]:
+    if result_type is None:
+        return None
+
+    result_schema = None
+    # try loading pydantic-compatible schemas
+    try:
+        TypeAdapter(result_type)
+        result_schema = result_type
+    except PydanticSchemaGenerationError:
+        pass
+    if result_schema is None:
+        raise ValueError(
+            f"Could not load or infer schema for result type {result_type}. "
+            "Please use a custom type or add compatibility."
+        )
+    return result_schema
diff --git a/src/controlflow/tools/orchestration.py b/src/controlflow/tools/orchestration.py
diff --git a/tests/tasks/test_tasks.py b/tests/tasks/test_tasks.py
@@ -312,3 +312,47 @@ def test_custom_templated_prompt(self, agent_context):
         task = SimpleTask(prompt="{{ task.objective }}", objective="abc")
         prompt = task.get_prompt(context=agent_context)
         assert prompt == "abc"
+
+
+class TestResultType:
+    def test_int_result(self):
+        task = Task("choose 5", result_type=int)
+        task.mark_successful(result=5)
+        assert task.result == 5
+
+    def test_str_result(self):
+        task = Task("choose 5", result_type=str)
+        task.mark_successful(result="5")
+        assert task.result == "5"
+
+    def test_tuple_of_ints_result(self):
+        task = Task("choose 5", result_type=(4, 5, 6))
+        task.mark_successful(result=5)
+        assert task.result == 5
+
+    def test_tuple_of_ints_validates(self):
+        task = Task("choose 5", result_type=(4, 5, 6))
+        with pytest.raises(ValueError):
+            task.mark_successful(result=7)
+
+
+class TestSuccessTool:
+    def test_success_tool(self):
+        task = Task("choose 5", result_type=int)
+        tool = task.create_success_tool()
+        tool.run(input=dict(result=5))
+        assert task.is_successful()
+        assert task.result == 5
+
+    def test_success_tool_with_list_of_options(self):
+        task = Task('choose "good"', result_type=["bad", "good", "medium"])
+        tool = task.create_success_tool()
+        tool.run(input=dict(result=1))
+        assert task.is_successful()
+        assert task.result == "good"
+
+    def test_success_tool_with_list_of_options_requires_int(self):
+        task = Task('choose "good"', result_type=["bad", "good", "medium"])
+        tool = task.create_success_tool()
+        with pytest.raises(ValueError):
+            tool.run(input=dict(result="good"))