diff --git a/environments/game_of_24/common/environment.py b/environments/game_of_24/common/environment.py
index 15d1d34..89ee533 100644
--- a/environments/game_of_24/common/environment.py
+++ b/environments/game_of_24/common/environment.py
@@ -93,8 +93,10 @@ def reset(
         observation, info = "", {"numbers": self.numbers}
 
         if options is not None and "trajectory" in options:
-            for action in options["trajectory"]:
-                assert isinstance(action, AgentAction)
+            for action, step in options["trajectory"]:
+                assert isinstance(
+                    action, AgentAction
+                ), f"Expected AgentAction, got {action}"
                 observation, reward, terminated, truncated, info = self.step(
                     (
                         action,
diff --git a/environments/game_of_24/reflexion.ipynb b/environments/game_of_24/reflexion.ipynb
index ae3d452..e426aff 100644
--- a/environments/game_of_24/reflexion.ipynb
+++ b/environments/game_of_24/reflexion.ipynb
@@ -22,16 +22,7 @@
   },
   {
    "cell_type": "code",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from langchain_openai import ChatOpenAI\n",
     "from textwrap import dedent\n",
@@ -49,12 +40,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:03.458549Z",
-     "start_time": "2024-04-16T10:51:03.442485Z"
+     "end_time": "2024-04-17T17:04:56.043909Z",
+     "start_time": "2024-04-17T17:04:54.987345Z"
     }
    },
    "id": "b53511c71640dcea",
-   "execution_count": 20
+   "execution_count": 1
   },
   {
    "cell_type": "markdown",
@@ -77,12 +68,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:03.857486Z",
-     "start_time": "2024-04-16T10:51:03.839869Z"
+     "end_time": "2024-04-17T17:04:56.054020Z",
+     "start_time": "2024-04-17T17:04:56.044163Z"
     }
    },
    "id": "c6dea842a5c669ab",
-   "execution_count": 21
+   "execution_count": 2
   },
   {
    "cell_type": "markdown",
@@ -103,12 +94,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:04.243385Z",
-     "start_time": "2024-04-16T10:51:04.225496Z"
+     "end_time": "2024-04-17T17:04:56.064405Z",
+     "start_time": "2024-04-17T17:04:56.053612Z"
     }
    },
    "id": "6777295e88f644e7",
-   "execution_count": 22
+   "execution_count": 3
   },
   {
    "cell_type": "markdown",
@@ -135,12 +126,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:04.607952Z",
-     "start_time": "2024-04-16T10:51:04.592747Z"
+     "end_time": "2024-04-17T17:04:56.075442Z",
+     "start_time": "2024-04-17T17:04:56.064872Z"
     }
    },
    "id": "b98d2eebc3a91cbe",
-   "execution_count": 23
+   "execution_count": 4
   },
   {
    "cell_type": "markdown",
@@ -185,7 +176,7 @@
      "data": {
       "text/plain": "['openai-functions', 'openai-tools']"
      },
-     "execution_count": 24,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -199,21 +190,21 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:05.396321Z",
-     "start_time": "2024-04-16T10:51:05.371723Z"
+     "end_time": "2024-04-17T17:04:56.868634Z",
+     "start_time": "2024-04-17T17:04:56.850606Z"
     }
    },
    "id": "d436b68b58554bcc",
-   "execution_count": 24
+   "execution_count": 5
   },
   {
    "cell_type": "code",
    "outputs": [
     {
      "data": {
-      "text/plain": "<planning_library.function_calling_parsers.openai_functions_parser.OpenAIFunctionsParser at 0x176c8e7d0>"
+      "text/plain": "<planning_library.function_calling_parsers.openai_functions_parser.OpenAIFunctionsParser at 0x177d5e810>"
      },
-     "execution_count": 25,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -224,12 +215,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:05.593073Z",
-     "start_time": "2024-04-16T10:51:05.576423Z"
+     "end_time": "2024-04-17T17:04:57.172108Z",
+     "start_time": "2024-04-17T17:04:57.150558Z"
     }
    },
    "id": "dc62e0f88fd9e2f5",
-   "execution_count": 25
+   "execution_count": 6
   },
   {
    "cell_type": "code",
@@ -245,7 +236,7 @@
       "\u001B[0;31mKeyError\u001B[0m: 'openai-123'",
       "\nDuring handling of the above exception, another exception occurred:\n",
       "\u001B[0;31mValueError\u001B[0m                                Traceback (most recent call last)",
-      "Cell \u001B[0;32mIn[26], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mParserRegistry\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_parser\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mopenai-123\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n",
+      "Cell \u001B[0;32mIn[7], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mParserRegistry\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_parser\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mopenai-123\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n",
       "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/function_calling_parsers/parser_registry.py:25\u001B[0m, in \u001B[0;36mParserRegistry.get_parser\u001B[0;34m(cls, parser_name)\u001B[0m\n\u001B[1;32m     23\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39mregistry[parser_name]\n\u001B[1;32m     24\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m:\n\u001B[0;32m---> 25\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[1;32m     26\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnknown parser \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mparser_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m. Currently available are: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39mget_available_parsers()\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m     27\u001B[0m     )\n",
       "\u001B[0;31mValueError\u001B[0m: Unknown parser openai-123. Currently available are: ['openai-functions', 'openai-tools']"
      ]
@@ -257,12 +248,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:05.865706Z",
-     "start_time": "2024-04-16T10:51:05.835017Z"
+     "end_time": "2024-04-17T17:04:58.139649Z",
+     "start_time": "2024-04-17T17:04:57.956224Z"
     }
    },
    "id": "8215e89168ce1a32",
-   "execution_count": 26
+   "execution_count": 7
   },
   {
    "cell_type": "markdown",
@@ -301,12 +292,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:06.508930Z",
-     "start_time": "2024-04-16T10:51:06.481486Z"
+     "end_time": "2024-04-17T17:04:59.517048Z",
+     "start_time": "2024-04-17T17:04:59.485210Z"
     }
    },
    "id": "8b4fb78057f1c638",
-   "execution_count": 27
+   "execution_count": 8
   },
   {
    "cell_type": "code",
@@ -320,12 +311,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:06.751563Z",
-     "start_time": "2024-04-16T10:51:06.725555Z"
+     "end_time": "2024-04-17T17:04:59.854415Z",
+     "start_time": "2024-04-17T17:04:59.825966Z"
     }
    },
    "id": "ce217e236c1ac466",
-   "execution_count": 28
+   "execution_count": 9
   },
   {
    "cell_type": "code",
@@ -337,12 +328,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:06.984998Z",
-     "start_time": "2024-04-16T10:51:06.969917Z"
+     "end_time": "2024-04-17T17:05:00.173342Z",
+     "start_time": "2024-04-17T17:05:00.152001Z"
     }
    },
    "id": "a3dc51120ddb467b",
-   "execution_count": 29
+   "execution_count": 10
   },
   {
    "cell_type": "code",
@@ -362,12 +353,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:07.220919Z",
-     "start_time": "2024-04-16T10:51:07.195773Z"
+     "end_time": "2024-04-17T17:05:00.520833Z",
+     "start_time": "2024-04-17T17:05:00.488562Z"
     }
    },
    "id": "34acfc8b28bbaf2f",
-   "execution_count": 30
+   "execution_count": 11
   },
   {
    "cell_type": "markdown",
@@ -422,12 +413,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:08.286803Z",
-     "start_time": "2024-04-16T10:51:08.257212Z"
+     "end_time": "2024-04-17T17:05:01.991170Z",
+     "start_time": "2024-04-17T17:05:01.962160Z"
     }
    },
    "id": "608d21e1c040ba1d",
-   "execution_count": 31
+   "execution_count": 12
   },
   {
    "cell_type": "code",
@@ -446,12 +437,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:08.539930Z",
-     "start_time": "2024-04-16T10:51:08.512890Z"
+     "end_time": "2024-04-17T17:05:02.468383Z",
+     "start_time": "2024-04-17T17:05:02.434490Z"
     }
    },
    "id": "b20c88b95bc90932",
-   "execution_count": 32
+   "execution_count": 13
   },
   {
    "cell_type": "code",
@@ -470,12 +461,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:08.929586Z",
-     "start_time": "2024-04-16T10:51:08.910605Z"
+     "end_time": "2024-04-17T17:05:02.759661Z",
+     "start_time": "2024-04-17T17:05:02.741319Z"
     }
    },
    "id": "f6e2fed4f770e360",
-   "execution_count": 33
+   "execution_count": 14
   },
   {
    "cell_type": "code",
@@ -493,21 +484,21 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:09.301497Z",
-     "start_time": "2024-04-16T10:51:09.288092Z"
+     "end_time": "2024-04-17T17:05:03.152792Z",
+     "start_time": "2024-04-17T17:05:03.133479Z"
     }
    },
    "id": "d5217c49707cefc",
-   "execution_count": 34
+   "execution_count": 15
   },
   {
    "cell_type": "code",
    "outputs": [
     {
      "data": {
-      "text/plain": "{'inputs': {'numbers': '1 1 4 6'},\n 'agent_outcome': AgentFinish(return_values={'output': \"I have explored various combinations of arithmetic operations with the given numbers, but I still couldn't reach the target of 24. It seems challenging to achieve the desired result with these particular numbers. Let's reassess and try a different approach.\"}, log=\"I have explored various combinations of arithmetic operations with the given numbers, but I still couldn't reach the target of 24. It seems challenging to achieve the desired result with these particular numbers. Let's reassess and try a different approach.\"),\n 'evaluator_should_continue': True,\n 'self_reflection_memory': ChatMessageHistory(messages=[('content', 'One possible reason for failure could be focusing too much on individual arithmetic operations without considering alternative combinations. To mitigate this, I should explore different sequences of operations earlier in the process rather than getting fixated on a single path. Additionally, quickly identifying potential opportunities for combining results to reach the target number more efficiently can enhance the overall problem-solving strategy.'), ('additional_kwargs', {}), ('type', 'ai'), ('name', None), ('id', None), ('example', False), ('content', 'One possible reason for the failure in this trial could be the lack of flexibility in exploring alternative arithmetic operations and combinations. To mitigate this, a new plan could involve systematically rotating through different operations and prioritizing interactions that provide intermediary results closer to the target number of 24. This approach aims to enhance adaptability and increase the chances of finding a successful solution.'), ('additional_kwargs', {}), ('type', 'ai'), ('name', None), ('id', None), ('example', False)]),\n 'self_reflections': [AIMessage(content='One possible reason for failure could be focusing too much on individual arithmetic operations without considering alternative combinations. To mitigate this, I should explore different sequences of operations earlier in the process rather than getting fixated on a single path. Additionally, quickly identifying potential opportunities for combining results to reach the target number more efficiently can enhance the overall problem-solving strategy.')],\n 'intermediate_steps': [(OpenAIToolAgentAction(tool='add', tool_input={'number1': 1, 'number2': 6}, log=\"\\nInvoking: `add` with `{'number1': 1, 'number2': 6}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_CYqNTOxHN3hKb5uOJClDiSEY', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_RUgcqowVvZsFCyoDhkA4F59w', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_CYqNTOxHN3hKb5uOJClDiSEY'),\n   {'observation': 'result of current arithmetical operation on 1.0 and 6.0 is 7.0',\n    'reward': 0,\n    'terminated': False,\n    'truncated': False,\n    'info': {'numbers': '1.0 4.0 7.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 1, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 1, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_CYqNTOxHN3hKb5uOJClDiSEY', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_RUgcqowVvZsFCyoDhkA4F59w', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_RUgcqowVvZsFCyoDhkA4F59w'),\n   {'observation': 'result of current arithmetical operation on 1.0 and 4.0 is 4.0',\n    'reward': 0,\n    'terminated': False,\n    'truncated': False,\n    'info': {'numbers': '7.0 4.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 7, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 7, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_qKIKNbPvMqGgkWX0svwYhvID', 'function': {'arguments': '{\"number1\": 7, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}, {'index': 1, 'id': 'call_SDBK3GiIF4mslQBPNoCNR64X', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_qKIKNbPvMqGgkWX0svwYhvID'),\n   {'observation': 'result of current arithmetical operation on 7.0 and 4.0 is 28.0',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 1, 'number2': 6}, log=\"\\nInvoking: `multiply` with `{'number1': 1, 'number2': 6}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_qKIKNbPvMqGgkWX0svwYhvID', 'function': {'arguments': '{\"number1\": 7, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}, {'index': 1, 'id': 'call_SDBK3GiIF4mslQBPNoCNR64X', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_SDBK3GiIF4mslQBPNoCNR64X'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='subtract', tool_input={'number1': 6, 'number2': 4}, log=\"\\nInvoking: `subtract` with `{'number1': 6, 'number2': 4}`\\nresponded: I attempted various arithmetic operations with the given numbers, but I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(1 + 6 = 7\\\\)\\n2. \\\\(7 * 4 = 28\\\\)\\nI failed to reach 24. Let's reassess the numbers and try a different approach.\\n\\n\", message_log=[AIMessageChunk(content=\"I attempted various arithmetic operations with the given numbers, but I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(1 + 6 = 7\\\\)\\n2. \\\\(7 * 4 = 28\\\\)\\nI failed to reach 24. Let's reassess the numbers and try a different approach.\", additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_5U3tdD068VDtn4ySRRTpPE7a', 'function': {'arguments': '{\"number1\": 6, \"number2\": 4}', 'name': 'subtract'}, 'type': 'function'}, {'index': 1, 'id': 'call_Cw3bs0ZOaiDCxIpUMMwsAXS8', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_5U3tdD068VDtn4ySRRTpPE7a'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 1, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 1, 'number2': 4}`\\nresponded: I attempted various arithmetic operations with the given numbers, but I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(1 + 6 = 7\\\\)\\n2. \\\\(7 * 4 = 28\\\\)\\nI failed to reach 24. Let's reassess the numbers and try a different approach.\\n\\n\", message_log=[AIMessageChunk(content=\"I attempted various arithmetic operations with the given numbers, but I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(1 + 6 = 7\\\\)\\n2. \\\\(7 * 4 = 28\\\\)\\nI failed to reach 24. Let's reassess the numbers and try a different approach.\", additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_5U3tdD068VDtn4ySRRTpPE7a', 'function': {'arguments': '{\"number1\": 6, \"number2\": 4}', 'name': 'subtract'}, 'type': 'function'}, {'index': 1, 'id': 'call_Cw3bs0ZOaiDCxIpUMMwsAXS8', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_Cw3bs0ZOaiDCxIpUMMwsAXS8'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 6, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 6, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_ngt9FZObEls88F1PiHrIRmhl', 'function': {'arguments': '{\"number1\":6,\"number2\":4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_ngt9FZObEls88F1PiHrIRmhl'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='add', tool_input={'number1': 1, 'number2': 6}, log=\"\\nInvoking: `add` with `{'number1': 1, 'number2': 6}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_Lkc5ZJ2Ix2Wqu6Dqpv5s9LvV', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_iMnzX2tfVLeM5CGwPUrnOiWr', 'function': {'arguments': '{\"number1\": 4, \"number2\": 6}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_Lkc5ZJ2Ix2Wqu6Dqpv5s9LvV'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 4, 'number2': 6}, log=\"\\nInvoking: `multiply` with `{'number1': 4, 'number2': 6}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_Lkc5ZJ2Ix2Wqu6Dqpv5s9LvV', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_iMnzX2tfVLeM5CGwPUrnOiWr', 'function': {'arguments': '{\"number1\": 4, \"number2\": 6}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_iMnzX2tfVLeM5CGwPUrnOiWr'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 1, 'number2': 6}, log=\"\\nInvoking: `multiply` with `{'number1': 1, 'number2': 6}`\\nresponded: I have exhausted all possible arithmetic operations with the given numbers, and unfortunately, I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(6 - 4 = 2\\\\)\\n2. \\\\(2 * 4 = 8\\\\)\\nThe numbers could not be combined to make 24. Let's review the numbers and try a different approach.\\n\\n\", message_log=[AIMessageChunk(content=\"I have exhausted all possible arithmetic operations with the given numbers, and unfortunately, I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(6 - 4 = 2\\\\)\\n2. \\\\(2 * 4 = 8\\\\)\\nThe numbers could not be combined to make 24. Let's review the numbers and try a different approach.\", additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_MOD7Bb2UycNzDQvB5txrLOEd', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'multiply'}, 'type': 'function'}, {'index': 1, 'id': 'call_POQolTZd7bjhHJjNVbKR0LTj', 'function': {'arguments': '{\"number1\": 4, \"number2\": 1}', 'name': 'subtract'}, 'type': 'function'}]})], tool_call_id='call_MOD7Bb2UycNzDQvB5txrLOEd'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='subtract', tool_input={'number1': 4, 'number2': 1}, log=\"\\nInvoking: `subtract` with `{'number1': 4, 'number2': 1}`\\nresponded: I have exhausted all possible arithmetic operations with the given numbers, and unfortunately, I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(6 - 4 = 2\\\\)\\n2. \\\\(2 * 4 = 8\\\\)\\nThe numbers could not be combined to make 24. Let's review the numbers and try a different approach.\\n\\n\", message_log=[AIMessageChunk(content=\"I have exhausted all possible arithmetic operations with the given numbers, and unfortunately, I couldn't reach the target of 24. Here are the last steps I took:\\n1. \\\\(6 - 4 = 2\\\\)\\n2. \\\\(2 * 4 = 8\\\\)\\nThe numbers could not be combined to make 24. Let's review the numbers and try a different approach.\", additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_MOD7Bb2UycNzDQvB5txrLOEd', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'multiply'}, 'type': 'function'}, {'index': 1, 'id': 'call_POQolTZd7bjhHJjNVbKR0LTj', 'function': {'arguments': '{\"number1\": 4, \"number2\": 1}', 'name': 'subtract'}, 'type': 'function'}]})], tool_call_id='call_POQolTZd7bjhHJjNVbKR0LTj'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 6, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 6, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_yYTJC8CnIjQM59hCd0FRq7EX', 'function': {'arguments': '{\"number1\": 6, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}, {'index': 1, 'id': 'call_44tRTXufkvN7SAngMOKpoHcY', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'subtract'}, 'type': 'function'}]})], tool_call_id='call_yYTJC8CnIjQM59hCd0FRq7EX'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='subtract', tool_input={'number1': 1, 'number2': 4}, log=\"\\nInvoking: `subtract` with `{'number1': 1, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_yYTJC8CnIjQM59hCd0FRq7EX', 'function': {'arguments': '{\"number1\": 6, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}, {'index': 1, 'id': 'call_44tRTXufkvN7SAngMOKpoHcY', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'subtract'}, 'type': 'function'}]})], tool_call_id='call_44tRTXufkvN7SAngMOKpoHcY'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='add', tool_input={'number1': 1, 'number2': 4}, log=\"\\nInvoking: `add` with `{'number1': 1, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_oGMxfgiGRu9uR1agM84i6ct9', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_kj7hgo0p8zqyQwWrKHTEABBk', 'function': {'arguments': '{\"number1\": 6, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_oGMxfgiGRu9uR1agM84i6ct9'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 6, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 6, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_oGMxfgiGRu9uR1agM84i6ct9', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_kj7hgo0p8zqyQwWrKHTEABBk', 'function': {'arguments': '{\"number1\": 6, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_kj7hgo0p8zqyQwWrKHTEABBk'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '28.0'}})],\n 'iteration': 2}"
+      "text/plain": "{'inputs': {'numbers': '1 1 4 6'},\n 'agent_outcome': AgentFinish(return_values={'output': \"I have completed the calculations, but unfortunately, I couldn't reach the target number 24.\\n\\nFinal Result:\\n- Addition: 7 + 4 = 11\\n\\nRemaining number: 11\\n\\nI will reflect on this attempt and strategize for the next one to improve the chances of reaching the goal.\\nDiagnosis:\\n- In this attempt, I failed to reach the target number 24 by focusing on addition and multiplication only, without considering other operations like subtraction or division.\\n- I also didn't explore all possible combinations of the numbers, limiting the potential solutions.\\n\\nNew Plan:\\n- For the next attempt, I will incorporate subtraction and division operations to broaden the scope of possible solutions.\\n- I will systematically explore various permutations and combinations of the numbers to increase the chances of success.\\n- By maintaining a balanced approach to arithmetic operations, I aim to find the correct expression to reach the target number.\"}, log=\"I have completed the calculations, but unfortunately, I couldn't reach the target number 24.\\n\\nFinal Result:\\n- Addition: 7 + 4 = 11\\n\\nRemaining number: 11\\n\\nI will reflect on this attempt and strategize for the next one to improve the chances of reaching the goal.\\nDiagnosis:\\n- In this attempt, I failed to reach the target number 24 by focusing on addition and multiplication only, without considering other operations like subtraction or division.\\n- I also didn't explore all possible combinations of the numbers, limiting the potential solutions.\\n\\nNew Plan:\\n- For the next attempt, I will incorporate subtraction and division operations to broaden the scope of possible solutions.\\n- I will systematically explore various permutations and combinations of the numbers to increase the chances of success.\\n- By maintaining a balanced approach to arithmetic operations, I aim to find the correct expression to reach the target number.\"),\n 'evaluator_should_continue': True,\n 'self_reflection_memory': ChatMessageHistory(messages=[('content', 'Diagnosis: In this trial, I struggled to find a solution for obtaining 24 using the numbers 1, 1, 4, and 6, mainly because I may have been fixated on specific arithmetic operations and order of operations, limiting my exploration of alternative combinations.\\n\\nNew Plan: To mitigate this, I will adopt a more flexible approach by considering various permutations of the numbers and exploring different combinations of arithmetic operations without getting stuck on a single pattern. I will also prioritize breaking down the problem into simpler steps and maintaining a systematic method of trial and error to increase the likelihood of finding the correct solution.'), ('additional_kwargs', {}), ('type', 'ai'), ('name', None), ('id', None), ('example', False), ('content', 'Diagnosis: The failure in this trial may be attributed to a narrow focus on specific arithmetic operations, neglecting the potential of alternative approaches for reaching the target number. \\n\\nNew Plan: To mitigate this shortcoming, I will diversify the exploration of arithmetic operations and prioritize flexible thinking by considering all possible combinations of numbers and operations, ensuring a comprehensive approach towards achieving the target number.'), ('additional_kwargs', {}), ('type', 'ai'), ('name', None), ('id', None), ('example', False)]),\n 'self_reflections': [AIMessage(content='Diagnosis: In this trial, I struggled to find a solution for obtaining 24 using the numbers 1, 1, 4, and 6, mainly because I may have been fixated on specific arithmetic operations and order of operations, limiting my exploration of alternative combinations.\\n\\nNew Plan: To mitigate this, I will adopt a more flexible approach by considering various permutations of the numbers and exploring different combinations of arithmetic operations without getting stuck on a single pattern. I will also prioritize breaking down the problem into simpler steps and maintaining a systematic method of trial and error to increase the likelihood of finding the correct solution.')],\n 'intermediate_steps': [(OpenAIToolAgentAction(tool='add', tool_input={'number1': 1, 'number2': 6}, log=\"\\nInvoking: `add` with `{'number1': 1, 'number2': 6}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_X0KNNWAvf3vWOl3mH8JRUu3C', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_nThdtz95kZJMRqgJTpXN6mBX', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_X0KNNWAvf3vWOl3mH8JRUu3C'),\n   {'observation': 'result of current arithmetical operation on 1.0 and 6.0 is 7.0',\n    'reward': 0,\n    'terminated': False,\n    'truncated': False,\n    'info': {'numbers': '1.0 4.0 7.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 1, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 1, 'number2': 4}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_X0KNNWAvf3vWOl3mH8JRUu3C', 'function': {'arguments': '{\"number1\": 1, \"number2\": 6}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_nThdtz95kZJMRqgJTpXN6mBX', 'function': {'arguments': '{\"number1\": 1, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_nThdtz95kZJMRqgJTpXN6mBX'),\n   {'observation': 'result of current arithmetical operation on 1.0 and 4.0 is 4.0',\n    'reward': 0,\n    'terminated': False,\n    'truncated': False,\n    'info': {'numbers': '7.0 4.0'}}),\n  (OpenAIToolAgentAction(tool='add', tool_input={'number1': 7, 'number2': 4}, log=\"\\nInvoking: `add` with `{'number1': 7, 'number2': 4}`\\nresponded: I have made progress with the calculations:\\n\\n1. Addition: 1 + 6 = 7\\n2. Multiplication: 1 * 4 = 4\\n\\nRemaining numbers: 7, 4\\n\\nI will continue with the next steps.\\n\\n\", message_log=[AIMessageChunk(content='I have made progress with the calculations:\\n\\n1. Addition: 1 + 6 = 7\\n2. Multiplication: 1 * 4 = 4\\n\\nRemaining numbers: 7, 4\\n\\nI will continue with the next steps.', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_EWvqc0F5dyMVYei7mQUaaokp', 'function': {'arguments': '{\"number1\": 7, \"number2\": 4}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_TX4s8ZbvXPSFWNvdoGh4yNvb', 'function': {'arguments': '{\"number1\": 7, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_EWvqc0F5dyMVYei7mQUaaokp'),\n   {'observation': 'result of current arithmetical operation on 7.0 and 4.0 is 11.0',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '11.0'}}),\n  (OpenAIToolAgentAction(tool='multiply', tool_input={'number1': 7, 'number2': 4}, log=\"\\nInvoking: `multiply` with `{'number1': 7, 'number2': 4}`\\nresponded: I have made progress with the calculations:\\n\\n1. Addition: 1 + 6 = 7\\n2. Multiplication: 1 * 4 = 4\\n\\nRemaining numbers: 7, 4\\n\\nI will continue with the next steps.\\n\\n\", message_log=[AIMessageChunk(content='I have made progress with the calculations:\\n\\n1. Addition: 1 + 6 = 7\\n2. Multiplication: 1 * 4 = 4\\n\\nRemaining numbers: 7, 4\\n\\nI will continue with the next steps.', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_EWvqc0F5dyMVYei7mQUaaokp', 'function': {'arguments': '{\"number1\": 7, \"number2\": 4}', 'name': 'add'}, 'type': 'function'}, {'index': 1, 'id': 'call_TX4s8ZbvXPSFWNvdoGh4yNvb', 'function': {'arguments': '{\"number1\": 7, \"number2\": 4}', 'name': 'multiply'}, 'type': 'function'}]})], tool_call_id='call_TX4s8ZbvXPSFWNvdoGh4yNvb'),\n   {'observation': 'The environment has already been terminated.',\n    'reward': 0,\n    'terminated': True,\n    'truncated': False,\n    'info': {'numbers': '11.0'}})],\n 'iteration': 2}"
      },
-     "execution_count": 35,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -522,12 +513,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T10:51:31.724747Z",
-     "start_time": "2024-04-16T10:51:09.533537Z"
+     "end_time": "2024-04-17T17:05:54.683068Z",
+     "start_time": "2024-04-17T17:05:03.956702Z"
     }
    },
    "id": "415a8d21878f332d",
-   "execution_count": 35
+   "execution_count": 16
   }
  ],
  "metadata": {
diff --git a/environments/game_of_24/tot_dfs.ipynb b/environments/game_of_24/tot_dfs.ipynb
index df96b5f..fb27c99 100644
--- a/environments/game_of_24/tot_dfs.ipynb
+++ b/environments/game_of_24/tot_dfs.ipynb
@@ -48,8 +48,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:40:41.611007Z",
-     "start_time": "2024-04-16T16:40:40.556602Z"
+     "end_time": "2024-04-17T17:01:49.125155Z",
+     "start_time": "2024-04-17T17:01:48.094919Z"
     }
    },
    "id": "c94d0bfae6ac52b7"
@@ -76,8 +76,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:40:41.622101Z",
-     "start_time": "2024-04-16T16:40:41.611413Z"
+     "end_time": "2024-04-17T17:01:49.135527Z",
+     "start_time": "2024-04-17T17:01:49.125568Z"
     }
    },
    "id": "7d8838345955d3c2"
@@ -101,8 +101,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:40:41.632229Z",
-     "start_time": "2024-04-16T16:40:41.622289Z"
+     "end_time": "2024-04-17T17:01:49.146214Z",
+     "start_time": "2024-04-17T17:01:49.135746Z"
     }
    },
    "id": "d3c7e376d173b5b6",
@@ -145,8 +145,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:40:41.642600Z",
-     "start_time": "2024-04-16T16:40:41.632567Z"
+     "end_time": "2024-04-17T17:01:49.162016Z",
+     "start_time": "2024-04-17T17:01:49.146423Z"
     }
    },
    "id": "3b1ff5faf3a98a79"
@@ -185,7 +185,7 @@
    "outputs": [],
    "source": [
     "thought_evaluator = ThoughtEvaluator.create(llm=ChatOpenAI(model=model_name, temperature=temperature), \n",
-    "                                      user_message=dedent(\"\"\"\"\n",
+    "                                      user_message=dedent(\"\"\"\n",
     "                                      Given an input and an answer, give a judgement if the answer is correct, i.e. \n",
     "                                      1) it uses each given number exactly once; \n",
     "                                      2) it doesn't use any other number; \n",
@@ -201,8 +201,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:40:41.671147Z",
-     "start_time": "2024-04-16T16:40:41.643489Z"
+     "end_time": "2024-04-17T17:01:49.348388Z",
+     "start_time": "2024-04-17T17:01:49.323182Z"
     }
    },
    "id": "388cbde0cb39f2b0",
@@ -245,7 +245,7 @@
     "                                            tools=env.tools, \n",
     "                                            parser_name=\"openai-tools\", \n",
     "            user_message=dedent(\"\"\"\n",
-    "            \"You are given four numbers, and your end goal is to obtain 24 from given numbers via basic arithmetic operations. \n",
+    "            You are given four numbers, and your end goal is to obtain 24 from given numbers via basic arithmetic operations. \n",
     "\n",
     "            Let's play Game of 24 in a step-by-step fashion: use only one of available tools to suggest a possible next step from the current state. Please, make sure to suggest exactly ONE (1) tool call, no more and no less. \n",
     "            \n",
@@ -258,12 +258,12 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:41:37.616937Z",
-     "start_time": "2024-04-16T16:41:37.590459Z"
+     "end_time": "2024-04-17T17:01:50.123426Z",
+     "start_time": "2024-04-17T17:01:50.021810Z"
     }
    },
    "id": "d48ca4a0c5d0fe71",
-   "execution_count": 10
+   "execution_count": 6
   },
   {
    "cell_type": "markdown",
@@ -280,9 +280,9 @@
    "outputs": [
     {
      "data": {
-      "text/plain": "<planning_library.strategies.tot_dfs.components.thought_generator.ThoughtGenerator at 0x176da1cd0>"
+      "text/plain": "<planning_library.strategies.tot_dfs.components.thought_generator.ThoughtGenerator at 0x1357d05d0>"
      },
-     "execution_count": 11,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -293,16 +293,16 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:41:38.225878Z",
-     "start_time": "2024-04-16T16:41:38.211755Z"
+     "end_time": "2024-04-17T17:01:50.726701Z",
+     "start_time": "2024-04-17T17:01:50.713306Z"
     }
    },
    "id": "22eed8130208d0ad",
-   "execution_count": 11
+   "execution_count": 7
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
    "outputs": [
     {
      "name": "stdout",
@@ -314,42 +314,17 @@
      ]
     },
     {
-     "ename": "NotImplementedError",
-     "evalue": "Unsupported message type: <class 'list'>",
+     "ename": "IndexError",
+     "evalue": "list index out of range",
      "output_type": "error",
      "traceback": [
       "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[0;31mNotImplementedError\u001B[0m                       Traceback (most recent call last)",
-      "Cell \u001B[0;32mIn[12], line 17\u001B[0m\n\u001B[1;32m      7\u001B[0m strategy_executor \u001B[38;5;241m=\u001B[39m TreeOfThoughtsDFSStrategy(\n\u001B[1;32m      8\u001B[0m     tools\u001B[38;5;241m=\u001B[39menv\u001B[38;5;241m.\u001B[39mtools,\n\u001B[1;32m      9\u001B[0m     action_executor\u001B[38;5;241m=\u001B[39maction_executor,\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m     14\u001B[0m     do_sorting\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[1;32m     15\u001B[0m )\n\u001B[1;32m     16\u001B[0m env\u001B[38;5;241m.\u001B[39mreset(options\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mnumbers\u001B[39m\u001B[38;5;124m\"\u001B[39m: [\u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m4\u001B[39m, \u001B[38;5;241m6\u001B[39m]})\n\u001B[0;32m---> 17\u001B[0m \u001B[43mstrategy_executor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minvoke\u001B[49m\u001B[43m(\u001B[49m\u001B[43m{\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mnumbers\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m1 1 4 6\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m}\u001B[49m\u001B[43m)\u001B[49m\n",
+      "\u001B[0;31mIndexError\u001B[0m                                Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[8], line 17\u001B[0m\n\u001B[1;32m      7\u001B[0m strategy_executor \u001B[38;5;241m=\u001B[39m TreeOfThoughtsDFSStrategy(\n\u001B[1;32m      8\u001B[0m     tools\u001B[38;5;241m=\u001B[39menv\u001B[38;5;241m.\u001B[39mtools,\n\u001B[1;32m      9\u001B[0m     action_executor\u001B[38;5;241m=\u001B[39maction_executor,\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m     14\u001B[0m     do_sorting\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[1;32m     15\u001B[0m )\n\u001B[1;32m     16\u001B[0m env\u001B[38;5;241m.\u001B[39mreset(options\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mnumbers\u001B[39m\u001B[38;5;124m\"\u001B[39m: [\u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m4\u001B[39m, \u001B[38;5;241m6\u001B[39m]})\n\u001B[0;32m---> 17\u001B[0m \u001B[43mstrategy_executor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minvoke\u001B[49m\u001B[43m(\u001B[49m\u001B[43m{\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mnumbers\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m1 1 4 6\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m}\u001B[49m\u001B[43m)\u001B[49m\n",
       "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain/chains/base.py:163\u001B[0m, in \u001B[0;36mChain.invoke\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m    161\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m    162\u001B[0m     run_manager\u001B[38;5;241m.\u001B[39mon_chain_error(e)\n\u001B[0;32m--> 163\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m e\n\u001B[1;32m    164\u001B[0m run_manager\u001B[38;5;241m.\u001B[39mon_chain_end(outputs)\n\u001B[1;32m    166\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m include_run_info:\n",
       "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain/chains/base.py:153\u001B[0m, in \u001B[0;36mChain.invoke\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m    150\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m    151\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_validate_inputs(inputs)\n\u001B[1;32m    152\u001B[0m     outputs \u001B[38;5;241m=\u001B[39m (\n\u001B[0;32m--> 153\u001B[0m         \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call\u001B[49m\u001B[43m(\u001B[49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    154\u001B[0m         \u001B[38;5;28;01mif\u001B[39;00m new_arg_supported\n\u001B[1;32m    155\u001B[0m         \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call(inputs)\n\u001B[1;32m    156\u001B[0m     )\n\u001B[1;32m    158\u001B[0m     final_outputs: Dict[\u001B[38;5;28mstr\u001B[39m, Any] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mprep_outputs(\n\u001B[1;32m    159\u001B[0m         inputs, outputs, return_only_outputs\n\u001B[1;32m    160\u001B[0m     )\n\u001B[1;32m    161\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n",
-      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/strategies/base_strategy.py:126\u001B[0m, in \u001B[0;36mBaseCustomStrategy._call\u001B[0;34m(self, inputs, run_manager)\u001B[0m\n\u001B[1;32m    123\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Run text through and get agent response.\"\"\"\u001B[39;00m\n\u001B[1;32m    124\u001B[0m name_to_tool_map, color_mapping \u001B[38;5;241m=\u001B[39m get_tools_maps(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtools)\n\u001B[0;32m--> 126\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[43m[\u001B[49m\n\u001B[1;32m    127\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_return\u001B[49m\u001B[43m(\u001B[49m\u001B[43moutput\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mintermediate_steps\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    128\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43moutput\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mintermediate_steps\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_run_strategy\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    129\u001B[0m \u001B[43m        \u001B[49m\u001B[43mname_to_tool_map\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mname_to_tool_map\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    130\u001B[0m \u001B[43m        \u001B[49m\u001B[43mcolor_mapping\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcolor_mapping\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    131\u001B[0m \u001B[43m        \u001B[49m\u001B[43minputs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    132\u001B[0m \u001B[43m        \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    133\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    134\u001B[0m \u001B[43m\u001B[49m\u001B[43m]\u001B[49m\n\u001B[1;32m    136\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m {key: [output[key] \u001B[38;5;28;01mfor\u001B[39;00m output \u001B[38;5;129;01min\u001B[39;00m outputs] \u001B[38;5;28;01mfor\u001B[39;00m key \u001B[38;5;129;01min\u001B[39;00m outputs[\u001B[38;5;241m0\u001B[39m]}\n",
-      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/strategies/base_strategy.py:126\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m    123\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Run text through and get agent response.\"\"\"\u001B[39;00m\n\u001B[1;32m    124\u001B[0m name_to_tool_map, color_mapping \u001B[38;5;241m=\u001B[39m get_tools_maps(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtools)\n\u001B[0;32m--> 126\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[43m[\u001B[49m\n\u001B[1;32m    127\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_return\u001B[49m\u001B[43m(\u001B[49m\u001B[43moutput\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mintermediate_steps\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    128\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43moutput\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mintermediate_steps\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_run_strategy\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    129\u001B[0m \u001B[43m        \u001B[49m\u001B[43mname_to_tool_map\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mname_to_tool_map\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    130\u001B[0m \u001B[43m        \u001B[49m\u001B[43mcolor_mapping\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcolor_mapping\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    131\u001B[0m \u001B[43m        \u001B[49m\u001B[43minputs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    132\u001B[0m \u001B[43m        \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    133\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    134\u001B[0m \u001B[43m\u001B[49m\u001B[43m]\u001B[49m\n\u001B[1;32m    136\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m {key: [output[key] \u001B[38;5;28;01mfor\u001B[39;00m output \u001B[38;5;129;01min\u001B[39;00m outputs] \u001B[38;5;28;01mfor\u001B[39;00m key \u001B[38;5;129;01min\u001B[39;00m outputs[\u001B[38;5;241m0\u001B[39m]}\n",
-      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/strategies/tot_dfs/tot_strategy.py:215\u001B[0m, in \u001B[0;36mTreeOfThoughtsDFSStrategy._run_strategy\u001B[0;34m(self, inputs, name_to_tool_map, color_mapping, run_manager)\u001B[0m\n\u001B[1;32m    212\u001B[0m \u001B[38;5;66;03m# TODO: traverses from the tree root to the cur_node on each call. how to optimize?\u001B[39;00m\n\u001B[1;32m    213\u001B[0m trajectory \u001B[38;5;241m=\u001B[39m cur_node\u001B[38;5;241m.\u001B[39mtrajectory\n\u001B[0;32m--> 215\u001B[0m \u001B[43m\u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mnew_thought\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_dfs_step\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    216\u001B[0m \u001B[43m    \u001B[49m\u001B[43minputs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    217\u001B[0m \u001B[43m    \u001B[49m\u001B[43mtrajectory\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtrajectory\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    218\u001B[0m \u001B[43m    \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    219\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\u001B[43m:\u001B[49m\n\u001B[1;32m    220\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;66;43;03m# actually do action(s)\u001B[39;49;00m\n\u001B[1;32m    221\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43misinstance\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mnew_thought\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mAgentFinish\u001B[49m\u001B[43m)\u001B[49m\u001B[43m:\u001B[49m\n\u001B[1;32m    222\u001B[0m \u001B[43m        \u001B[49m\u001B[43mobservation\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\n",
-      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/strategies/tot_dfs/tot_strategy.py:145\u001B[0m, in \u001B[0;36mTreeOfThoughtsDFSStrategy._dfs_step\u001B[0;34m(self, inputs, trajectory, run_manager)\u001B[0m\n\u001B[1;32m    130\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Performs a single step of DFS algorithm.\u001B[39;00m\n\u001B[1;32m    131\u001B[0m \n\u001B[1;32m    132\u001B[0m \u001B[38;5;124;03mArgs:\u001B[39;00m\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    141\u001B[0m \u001B[38;5;124;03m      * AgentFinish - for finishing thoughts / thoughts without tool calls\u001B[39;00m\n\u001B[1;32m    142\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m    144\u001B[0m \u001B[38;5;66;03m# 1: generate k possible next steps\u001B[39;00m\n\u001B[0;32m--> 145\u001B[0m thoughts \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mthought_generator\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minvoke\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    146\u001B[0m \u001B[43m    \u001B[49m\u001B[43mThoughtGeneratorInput\u001B[49m\u001B[43m(\u001B[49m\u001B[43minputs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mintermediate_steps\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtrajectory\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    147\u001B[0m \u001B[43m    \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_child\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtag\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mgenerate_thoughts\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m    148\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mrun_manager\u001B[49m\n\u001B[1;32m    149\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01melse\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m    150\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    152\u001B[0m \u001B[38;5;66;03m# 2: (optional) sort them\u001B[39;00m\n\u001B[1;32m    153\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdo_sorting:\n",
-      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/strategies/tot_dfs/components/thought_generator.py:100\u001B[0m, in \u001B[0;36mThoughtGenerator.invoke\u001B[0;34m(self, inputs, run_manager)\u001B[0m\n\u001B[1;32m     98\u001B[0m results: List[List[AgentAction] \u001B[38;5;241m|\u001B[39m AgentAction \u001B[38;5;241m|\u001B[39m AgentFinish] \u001B[38;5;241m=\u001B[39m []\n\u001B[1;32m     99\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m _ \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_num_thoughts):\n\u001B[0;32m--> 100\u001B[0m     cur_result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43magent\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minvoke\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    101\u001B[0m \u001B[43m        \u001B[49m\u001B[43m{\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mprevious_thoughts\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[43mresults\u001B[49m\u001B[43m}\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    102\u001B[0m \u001B[43m        \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    103\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    104\u001B[0m     \u001B[38;5;66;03m# TODO: how to fix mypy warning properly here?\u001B[39;00m\n\u001B[1;32m    105\u001B[0m     results\u001B[38;5;241m.\u001B[39mappend(cur_result)  \u001B[38;5;66;03m# type: ignore[arg-type]\u001B[39;00m\n",
-      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/components/agent_component.py:105\u001B[0m, in \u001B[0;36mAgentComponent.invoke\u001B[0;34m(self, inputs, run_manager)\u001B[0m\n\u001B[1;32m    100\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21minvoke\u001B[39m(\n\u001B[1;32m    101\u001B[0m     \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m    102\u001B[0m     inputs: InputType,\n\u001B[1;32m    103\u001B[0m     run_manager: Optional[CallbackManager] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m    104\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Union[List[AgentAction], AgentAction, AgentFinish]:\n\u001B[0;32m--> 105\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43magent\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mplan\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcallbacks\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrun_manager\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain/agents/agent.py:486\u001B[0m, in \u001B[0;36mRunnableMultiActionAgent.plan\u001B[0;34m(self, intermediate_steps, callbacks, **kwargs)\u001B[0m\n\u001B[1;32m    480\u001B[0m \u001B[38;5;66;03m# Use streaming to make sure that the underlying LLM is invoked in a streaming\u001B[39;00m\n\u001B[1;32m    481\u001B[0m \u001B[38;5;66;03m# fashion to make it possible to get access to the individual LLM tokens\u001B[39;00m\n\u001B[1;32m    482\u001B[0m \u001B[38;5;66;03m# when using stream_log with the Agent Executor.\u001B[39;00m\n\u001B[1;32m    483\u001B[0m \u001B[38;5;66;03m# Because the response from the plan is not a generator, we need to\u001B[39;00m\n\u001B[1;32m    484\u001B[0m \u001B[38;5;66;03m# accumulate the output into final output and return that.\u001B[39;00m\n\u001B[1;32m    485\u001B[0m final_output: Any \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m--> 486\u001B[0m \u001B[43m\u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mchunk\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrunnable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mstream\u001B[49m\u001B[43m(\u001B[49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconfig\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m{\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mcallbacks\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[43mcallbacks\u001B[49m\u001B[43m}\u001B[49m\u001B[43m)\u001B[49m\u001B[43m:\u001B[49m\n\u001B[1;32m    487\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mfinal_output\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mis\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m:\u001B[49m\n\u001B[1;32m    488\u001B[0m \u001B[43m        \u001B[49m\u001B[43mfinal_output\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[43mchunk\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:2446\u001B[0m, in \u001B[0;36mRunnableSequence.stream\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m   2440\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mstream\u001B[39m(\n\u001B[1;32m   2441\u001B[0m     \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m   2442\u001B[0m     \u001B[38;5;28minput\u001B[39m: Input,\n\u001B[1;32m   2443\u001B[0m     config: Optional[RunnableConfig] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m   2444\u001B[0m     \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Optional[Any],\n\u001B[1;32m   2445\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Iterator[Output]:\n\u001B[0;32m-> 2446\u001B[0m     \u001B[38;5;28;01myield from\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtransform(\u001B[38;5;28miter\u001B[39m([\u001B[38;5;28minput\u001B[39m]), config, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:2433\u001B[0m, in \u001B[0;36mRunnableSequence.transform\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m   2427\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mtransform\u001B[39m(\n\u001B[1;32m   2428\u001B[0m     \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m   2429\u001B[0m     \u001B[38;5;28minput\u001B[39m: Iterator[Input],\n\u001B[1;32m   2430\u001B[0m     config: Optional[RunnableConfig] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m   2431\u001B[0m     \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Optional[Any],\n\u001B[1;32m   2432\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Iterator[Output]:\n\u001B[0;32m-> 2433\u001B[0m     \u001B[38;5;28;01myield from\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_transform_stream_with_config(\n\u001B[1;32m   2434\u001B[0m         \u001B[38;5;28minput\u001B[39m,\n\u001B[1;32m   2435\u001B[0m         \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_transform,\n\u001B[1;32m   2436\u001B[0m         patch_config(config, run_name\u001B[38;5;241m=\u001B[39m(config \u001B[38;5;129;01mor\u001B[39;00m {})\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mrun_name\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mname),\n\u001B[1;32m   2437\u001B[0m         \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs,\n\u001B[1;32m   2438\u001B[0m     )\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:1513\u001B[0m, in \u001B[0;36mRunnable._transform_stream_with_config\u001B[0;34m(self, input, transformer, config, run_type, **kwargs)\u001B[0m\n\u001B[1;32m   1511\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m   1512\u001B[0m     \u001B[38;5;28;01mwhile\u001B[39;00m \u001B[38;5;28;01mTrue\u001B[39;00m:\n\u001B[0;32m-> 1513\u001B[0m         chunk: Output \u001B[38;5;241m=\u001B[39m context\u001B[38;5;241m.\u001B[39mrun(\u001B[38;5;28mnext\u001B[39m, iterator)  \u001B[38;5;66;03m# type: ignore\u001B[39;00m\n\u001B[1;32m   1514\u001B[0m         \u001B[38;5;28;01myield\u001B[39;00m chunk\n\u001B[1;32m   1515\u001B[0m         \u001B[38;5;28;01mif\u001B[39;00m final_output_supported:\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:2397\u001B[0m, in \u001B[0;36mRunnableSequence._transform\u001B[0;34m(self, input, run_manager, config)\u001B[0m\n\u001B[1;32m   2388\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m step \u001B[38;5;129;01min\u001B[39;00m steps:\n\u001B[1;32m   2389\u001B[0m     final_pipeline \u001B[38;5;241m=\u001B[39m step\u001B[38;5;241m.\u001B[39mtransform(\n\u001B[1;32m   2390\u001B[0m         final_pipeline,\n\u001B[1;32m   2391\u001B[0m         patch_config(\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m   2394\u001B[0m         ),\n\u001B[1;32m   2395\u001B[0m     )\n\u001B[0;32m-> 2397\u001B[0m \u001B[43m\u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43moutput\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mfinal_pipeline\u001B[49m\u001B[43m:\u001B[49m\n\u001B[1;32m   2398\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01myield\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43moutput\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:1051\u001B[0m, in \u001B[0;36mRunnable.transform\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m   1048\u001B[0m final: Input\n\u001B[1;32m   1049\u001B[0m got_first_val \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m-> 1051\u001B[0m \u001B[43m\u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mchunk\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m:\u001B[49m\n\u001B[1;32m   1052\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mgot_first_val\u001B[49m\u001B[43m:\u001B[49m\n\u001B[1;32m   1053\u001B[0m \u001B[43m        \u001B[49m\u001B[43mfinal\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[43mchunk\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:4173\u001B[0m, in \u001B[0;36mRunnableBindingBase.transform\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m   4167\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mtransform\u001B[39m(\n\u001B[1;32m   4168\u001B[0m     \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m   4169\u001B[0m     \u001B[38;5;28minput\u001B[39m: Iterator[Input],\n\u001B[1;32m   4170\u001B[0m     config: Optional[RunnableConfig] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m   4171\u001B[0m     \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Any,\n\u001B[1;32m   4172\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Iterator[Output]:\n\u001B[0;32m-> 4173\u001B[0m     \u001B[38;5;28;01myield from\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mbound\u001B[38;5;241m.\u001B[39mtransform(\n\u001B[1;32m   4174\u001B[0m         \u001B[38;5;28minput\u001B[39m,\n\u001B[1;32m   4175\u001B[0m         \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_merge_configs(config),\n\u001B[1;32m   4176\u001B[0m         \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39m{\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mkwargs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs},\n\u001B[1;32m   4177\u001B[0m     )\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:1051\u001B[0m, in \u001B[0;36mRunnable.transform\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m   1048\u001B[0m final: Input\n\u001B[1;32m   1049\u001B[0m got_first_val \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m-> 1051\u001B[0m \u001B[43m\u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mchunk\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m:\u001B[49m\n\u001B[1;32m   1052\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mgot_first_val\u001B[49m\u001B[43m:\u001B[49m\n\u001B[1;32m   1053\u001B[0m \u001B[43m        \u001B[49m\u001B[43mfinal\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[43mchunk\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:1061\u001B[0m, in \u001B[0;36mRunnable.transform\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m   1058\u001B[0m         final \u001B[38;5;241m=\u001B[39m final \u001B[38;5;241m+\u001B[39m chunk  \u001B[38;5;66;03m# type: ignore[operator]\u001B[39;00m\n\u001B[1;32m   1060\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m got_first_val:\n\u001B[0;32m-> 1061\u001B[0m     \u001B[38;5;28;01myield from\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstream(final, config, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:577\u001B[0m, in \u001B[0;36mRunnable.stream\u001B[0;34m(self, input, config, **kwargs)\u001B[0m\n\u001B[1;32m    567\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mstream\u001B[39m(\n\u001B[1;32m    568\u001B[0m     \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m    569\u001B[0m     \u001B[38;5;28minput\u001B[39m: Input,\n\u001B[1;32m    570\u001B[0m     config: Optional[RunnableConfig] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m    571\u001B[0m     \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Optional[Any],\n\u001B[1;32m    572\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Iterator[Output]:\n\u001B[1;32m    573\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m    574\u001B[0m \u001B[38;5;124;03m    Default implementation of stream, which calls invoke.\u001B[39;00m\n\u001B[1;32m    575\u001B[0m \u001B[38;5;124;03m    Subclasses should override this method if they support streaming output.\u001B[39;00m\n\u001B[1;32m    576\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m--> 577\u001B[0m     \u001B[38;5;28;01myield\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minvoke\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconfig\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/prompts/base.py:113\u001B[0m, in \u001B[0;36mBasePromptTemplate.invoke\u001B[0;34m(self, input, config)\u001B[0m\n\u001B[1;32m    111\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtags:\n\u001B[1;32m    112\u001B[0m     config[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtags\u001B[39m\u001B[38;5;124m\"\u001B[39m]\u001B[38;5;241m.\u001B[39mextend(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtags)\n\u001B[0;32m--> 113\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_with_config\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    114\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_format_prompt_with_error_handling\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    115\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m    116\u001B[0m \u001B[43m    \u001B[49m\u001B[43mconfig\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    117\u001B[0m \u001B[43m    \u001B[49m\u001B[43mrun_type\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mprompt\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m    118\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/base.py:1262\u001B[0m, in \u001B[0;36mRunnable._call_with_config\u001B[0;34m(self, func, input, config, run_type, **kwargs)\u001B[0m\n\u001B[1;32m   1258\u001B[0m     context \u001B[38;5;241m=\u001B[39m copy_context()\n\u001B[1;32m   1259\u001B[0m     context\u001B[38;5;241m.\u001B[39mrun(var_child_runnable_config\u001B[38;5;241m.\u001B[39mset, child_config)\n\u001B[1;32m   1260\u001B[0m     output \u001B[38;5;241m=\u001B[39m cast(\n\u001B[1;32m   1261\u001B[0m         Output,\n\u001B[0;32m-> 1262\u001B[0m         \u001B[43mcontext\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m   1263\u001B[0m \u001B[43m            \u001B[49m\u001B[43mcall_func_with_variable_args\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m   1264\u001B[0m \u001B[43m            \u001B[49m\u001B[43mfunc\u001B[49m\u001B[43m,\u001B[49m\u001B[43m  \u001B[49m\u001B[38;5;66;43;03m# type: ignore[arg-type]\u001B[39;49;00m\n\u001B[1;32m   1265\u001B[0m \u001B[43m            \u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m  \u001B[49m\u001B[38;5;66;43;03m# type: ignore[arg-type]\u001B[39;49;00m\n\u001B[1;32m   1266\u001B[0m \u001B[43m            \u001B[49m\u001B[43mconfig\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m   1267\u001B[0m \u001B[43m            \u001B[49m\u001B[43mrun_manager\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m   1268\u001B[0m \u001B[43m            \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m   1269\u001B[0m \u001B[43m        \u001B[49m\u001B[43m)\u001B[49m,\n\u001B[1;32m   1270\u001B[0m     )\n\u001B[1;32m   1271\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m   1272\u001B[0m     run_manager\u001B[38;5;241m.\u001B[39mon_chain_error(e)\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/runnables/config.py:326\u001B[0m, in \u001B[0;36mcall_func_with_variable_args\u001B[0;34m(func, input, config, run_manager, **kwargs)\u001B[0m\n\u001B[1;32m    324\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m run_manager \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m accepts_run_manager(func):\n\u001B[1;32m    325\u001B[0m     kwargs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mrun_manager\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m run_manager\n\u001B[0;32m--> 326\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/prompts/base.py:103\u001B[0m, in \u001B[0;36mBasePromptTemplate._format_prompt_with_error_handling\u001B[0;34m(self, inner_input)\u001B[0m\n\u001B[1;32m     97\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m missing:\n\u001B[1;32m     98\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(\n\u001B[1;32m     99\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mInput to \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__class__\u001B[39m\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m is missing variables \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mmissing\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m. \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m    100\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m Expected: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39minput_variables\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m    101\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m Received: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlist\u001B[39m(inner_input\u001B[38;5;241m.\u001B[39mkeys())\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m    102\u001B[0m     )\n\u001B[0;32m--> 103\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mformat_prompt\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43minner_input\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/prompts/chat.py:535\u001B[0m, in \u001B[0;36mBaseChatPromptTemplate.format_prompt\u001B[0;34m(self, **kwargs)\u001B[0m\n\u001B[1;32m    526\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mformat_prompt\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Any) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m PromptValue:\n\u001B[1;32m    527\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m    528\u001B[0m \u001B[38;5;124;03m    Format prompt. Should return a PromptValue.\u001B[39;00m\n\u001B[1;32m    529\u001B[0m \u001B[38;5;124;03m    Args:\u001B[39;00m\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    533\u001B[0m \u001B[38;5;124;03m        PromptValue.\u001B[39;00m\n\u001B[1;32m    534\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m--> 535\u001B[0m     messages \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mformat_messages\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    536\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m ChatPromptValue(messages\u001B[38;5;241m=\u001B[39mmessages)\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/prompts/chat.py:797\u001B[0m, in \u001B[0;36mChatPromptTemplate.format_messages\u001B[0;34m(self, **kwargs)\u001B[0m\n\u001B[1;32m    793\u001B[0m     result\u001B[38;5;241m.\u001B[39mextend([message_template])\n\u001B[1;32m    794\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(\n\u001B[1;32m    795\u001B[0m     message_template, (BaseMessagePromptTemplate, BaseChatPromptTemplate)\n\u001B[1;32m    796\u001B[0m ):\n\u001B[0;32m--> 797\u001B[0m     message \u001B[38;5;241m=\u001B[39m \u001B[43mmessage_template\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mformat_messages\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    798\u001B[0m     result\u001B[38;5;241m.\u001B[39mextend(message)\n\u001B[1;32m    799\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/prompts/chat.py:133\u001B[0m, in \u001B[0;36mMessagesPlaceholder.format_messages\u001B[0;34m(self, **kwargs)\u001B[0m\n\u001B[1;32m    128\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(value, \u001B[38;5;28mlist\u001B[39m):\n\u001B[1;32m    129\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[1;32m    130\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mvariable \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mvariable_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m should be a list of base messages, \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m    131\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgot \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mvalue\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m    132\u001B[0m     )\n\u001B[0;32m--> 133\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mconvert_to_messages\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalue\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/messages/__init__.py:247\u001B[0m, in \u001B[0;36mconvert_to_messages\u001B[0;34m(messages)\u001B[0m\n\u001B[1;32m    236\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mconvert_to_messages\u001B[39m(\n\u001B[1;32m    237\u001B[0m     messages: Sequence[MessageLikeRepresentation],\n\u001B[1;32m    238\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m List[BaseMessage]:\n\u001B[1;32m    239\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"Convert a sequence of messages to a list of messages.\u001B[39;00m\n\u001B[1;32m    240\u001B[0m \n\u001B[1;32m    241\u001B[0m \u001B[38;5;124;03m    Args:\u001B[39;00m\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    245\u001B[0m \u001B[38;5;124;03m        List of messages (BaseMessages).\u001B[39;00m\n\u001B[1;32m    246\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m--> 247\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43m[\u001B[49m\u001B[43m_convert_to_message\u001B[49m\u001B[43m(\u001B[49m\u001B[43mm\u001B[49m\u001B[43m)\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mm\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mmessages\u001B[49m\u001B[43m]\u001B[49m\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/messages/__init__.py:247\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m    236\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mconvert_to_messages\u001B[39m(\n\u001B[1;32m    237\u001B[0m     messages: Sequence[MessageLikeRepresentation],\n\u001B[1;32m    238\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m List[BaseMessage]:\n\u001B[1;32m    239\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"Convert a sequence of messages to a list of messages.\u001B[39;00m\n\u001B[1;32m    240\u001B[0m \n\u001B[1;32m    241\u001B[0m \u001B[38;5;124;03m    Args:\u001B[39;00m\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    245\u001B[0m \u001B[38;5;124;03m        List of messages (BaseMessages).\u001B[39;00m\n\u001B[1;32m    246\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m--> 247\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m [\u001B[43m_convert_to_message\u001B[49m\u001B[43m(\u001B[49m\u001B[43mm\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mfor\u001B[39;00m m \u001B[38;5;129;01min\u001B[39;00m messages]\n",
-      "File \u001B[0;32m~/Library/Caches/pypoetry/virtualenvs/planning-library-q8r1q5f_-py3.11/lib/python3.11/site-packages/langchain_core/messages/__init__.py:231\u001B[0m, in \u001B[0;36m_convert_to_message\u001B[0;34m(message)\u001B[0m\n\u001B[1;32m    227\u001B[0m     _message \u001B[38;5;241m=\u001B[39m _create_message_from_message_type(\n\u001B[1;32m    228\u001B[0m         msg_type, msg_content, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmsg_kwargs\n\u001B[1;32m    229\u001B[0m     )\n\u001B[1;32m    230\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m--> 231\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mNotImplementedError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnsupported message type: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mtype\u001B[39m(message)\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    233\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m _message\n",
-      "\u001B[0;31mNotImplementedError\u001B[0m: Unsupported message type: <class 'list'>"
+      "File \u001B[0;32m~/PycharmProjects/planning-library/planning_library/strategies/base_strategy.py:136\u001B[0m, in \u001B[0;36mBaseCustomStrategy._call\u001B[0;34m(self, inputs, run_manager)\u001B[0m\n\u001B[1;32m    124\u001B[0m name_to_tool_map, color_mapping \u001B[38;5;241m=\u001B[39m get_tools_maps(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtools)\n\u001B[1;32m    126\u001B[0m outputs \u001B[38;5;241m=\u001B[39m [\n\u001B[1;32m    127\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_return(output, intermediate_steps, run_manager\u001B[38;5;241m=\u001B[39mrun_manager)\n\u001B[1;32m    128\u001B[0m     \u001B[38;5;28;01mfor\u001B[39;00m output, intermediate_steps \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_run_strategy(\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    133\u001B[0m     )\n\u001B[1;32m    134\u001B[0m ]\n\u001B[0;32m--> 136\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m {key: [output[key] \u001B[38;5;28;01mfor\u001B[39;00m output \u001B[38;5;129;01min\u001B[39;00m outputs] \u001B[38;5;28;01mfor\u001B[39;00m key \u001B[38;5;129;01min\u001B[39;00m \u001B[43moutputs\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;241;43m0\u001B[39;49m\u001B[43m]\u001B[49m}\n",
+      "\u001B[0;31mIndexError\u001B[0m: list index out of range"
      ]
     }
    ],
@@ -375,8 +350,7 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-04-16T16:41:39.550050Z",
-     "start_time": "2024-04-16T16:41:38.572797Z"
+     "start_time": "2024-04-17T17:01:51.064275Z"
     }
    },
    "id": "3511e290e2ab0d52"
diff --git a/planning_library/action_executors/base_action_executor.py b/planning_library/action_executors/base_action_executor.py
index 93ad76c..2183aa8 100644
--- a/planning_library/action_executors/base_action_executor.py
+++ b/planning_library/action_executors/base_action_executor.py
@@ -13,7 +13,12 @@ class BaseActionExecutor(ABC):
     def tools(self) -> Sequence[BaseTool]: ...
 
     @abstractmethod
-    def reset(self, actions: Optional[List[AgentAction]] = None, **kwargs) -> None:
+    def reset(
+        self,
+        actions: Optional[List[AgentAction]] = None,
+        run_manager: Optional[CallbackManager] = None,
+        **kwargs,
+    ) -> None:
         """Resets the current state. If actions are passed, will also execute them."""
         ...
 
diff --git a/planning_library/action_executors/default_action_executor.py b/planning_library/action_executors/default_action_executor.py
index cbe4430..7970fe5 100644
--- a/planning_library/action_executors/default_action_executor.py
+++ b/planning_library/action_executors/default_action_executor.py
@@ -15,7 +15,12 @@ class DefaultActionExecutor(BaseActionExecutor):
     def __init__(self, tools: Sequence[BaseTool]):
         self._tool_executor = ToolExecutor(tools)
 
-    def reset(self, actions: Optional[List[AgentAction]] = None, **kwargs) -> None:
+    def reset(
+        self,
+        actions: Optional[List[AgentAction]] = None,
+        run_manager: Optional[CallbackManager] = None,
+        **kwargs,
+    ) -> None:
         """Resets the current state. If actions are passed, will also execute them.
 
         This action executor doesn't have a state by default, so this method doesn't do anything.
diff --git a/planning_library/action_executors/gymnasium_action_executor.py b/planning_library/action_executors/gymnasium_action_executor.py
index e0b710c..b8c1ad2 100644
--- a/planning_library/action_executors/gymnasium_action_executor.py
+++ b/planning_library/action_executors/gymnasium_action_executor.py
@@ -25,13 +25,21 @@ def __init__(
     def tools(self) -> Sequence[BaseTool]:
         return self._env.get_wrapper_attr("tools")
 
-    def reset(self, actions: Optional[List[AgentAction]] = None, **kwargs) -> None:
+    def reset(
+        self,
+        actions: Optional[List[AgentAction]] = None,
+        run_manager: Optional[CallbackManager] = None,
+        **kwargs,
+    ) -> None:
         """Resets the environment. If actions are passed, will also execute them."""
 
         options = kwargs
         if actions:
             options["trajectory"] = actions
 
+        if run_manager:
+            options["run_manager"] = run_manager
+
         self._env.reset(seed=self._seed, options=options)
 
     @overload
diff --git a/planning_library/components/agent_component.py b/planning_library/components/agent_component.py
index e19f105..0127603 100644
--- a/planning_library/components/agent_component.py
+++ b/planning_library/components/agent_component.py
@@ -98,17 +98,18 @@ def add_output_preprocessing(
             )
 
     def invoke(
-        self,
-        inputs: InputType,
-        run_manager: Optional[CallbackManager] = None,
+        self, inputs: InputType, run_manager: Optional[CallbackManager] = None, **kwargs
     ) -> Union[List[AgentAction], AgentAction, AgentFinish]:
+        # TODO: no way to pass name to plan?
         return self.agent.plan(**inputs, callbacks=run_manager)
 
     async def ainvoke(
         self,
         inputs: InputType,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> Union[List[AgentAction], AgentAction, AgentFinish]:
+        # TODO: no way to pass name to plan?
         outputs = await self.agent.aplan(**inputs, callbacks=run_manager)
         return outputs
 
diff --git a/planning_library/components/base_component.py b/planning_library/components/base_component.py
index 95f2e40..c6c2a7c 100644
--- a/planning_library/components/base_component.py
+++ b/planning_library/components/base_component.py
@@ -8,6 +8,7 @@
 
 
 class BaseComponent(Generic[InputType, OutputType], ABC):
+    name: Optional[str] = None
     required_prompt_input_vars: Set[str] = set()
 
     @classmethod
@@ -56,9 +57,7 @@ def add_output_preprocessing(
 
     @abstractmethod
     def invoke(
-        self,
-        inputs: InputType,
-        run_manager: Optional[CallbackManager] = None,
+        self, inputs: InputType, run_manager: Optional[CallbackManager] = None, **kwargs
     ) -> OutputType: ...
 
     @abstractmethod
@@ -66,4 +65,5 @@ async def ainvoke(
         self,
         inputs: InputType,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> OutputType: ...
diff --git a/planning_library/components/evaluation/evaluator_component.py b/planning_library/components/evaluation/evaluator_component.py
index 729eb10..a586c35 100644
--- a/planning_library/components/evaluation/evaluator_component.py
+++ b/planning_library/components/evaluation/evaluator_component.py
@@ -37,11 +37,12 @@ def add_output_preprocessing(
         self.judge.add_output_preprocessing(preprocess, apreprocess)
 
     def invoke(
-        self,
-        inputs: InputType,
-        run_manager: Optional[CallbackManager] = None,
+        self, inputs: InputType, run_manager: Optional[CallbackManager] = None, **kwargs
     ) -> bool:
-        backbone_output = self.backbone.invoke(inputs, run_manager)
+        if "run_name" not in kwargs and self.name:
+            kwargs["run_name"] = self.name
+
+        backbone_output = self.backbone.invoke(inputs, run_manager, **kwargs)
         should_continue = self.judge.invoke(
             {"backbone_output": backbone_output}, run_manager
         )
@@ -51,8 +52,12 @@ async def ainvoke(
         self,
         inputs: InputType,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> bool:
-        backbone_output = await self.backbone.ainvoke(inputs, run_manager)
+        if "run_name" not in kwargs and self.name:
+            kwargs["run_name"] = self.name
+
+        backbone_output = await self.backbone.ainvoke(inputs, run_manager, **kwargs)
         should_continue = await self.judge.ainvoke(
             {"backbone_output": backbone_output}, run_manager
         )
diff --git a/planning_library/components/evaluation/threshold_judge.py b/planning_library/components/evaluation/threshold_judge.py
index af3b451..84bd780 100644
--- a/planning_library/components/evaluation/threshold_judge.py
+++ b/planning_library/components/evaluation/threshold_judge.py
@@ -9,9 +9,7 @@ def __init__(self, threshold: float):
         self.threshold = threshold
 
     def invoke(
-        self,
-        inputs: InputType,
-        run_manager: Optional[CallbackManager] = None,
+        self, inputs: InputType, run_manager: Optional[CallbackManager] = None, **kwargs
     ) -> bool:
         return inputs["backbone_output"] <= self.threshold
 
@@ -19,6 +17,7 @@ async def ainvoke(
         self,
         inputs: InputType,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> bool:
         return inputs["backbone_output"] <= self.threshold
 
@@ -28,9 +27,7 @@ def __init__(self, threshold: float):
         self.threshold = threshold
 
     def invoke(
-        self,
-        inputs: InputType,
-        run_manager: Optional[CallbackManager] = None,
+        self, inputs: InputType, run_manager: Optional[CallbackManager] = None, **kwargs
     ) -> bool:
         return inputs["backbone_output"] >= self.threshold
 
@@ -38,5 +35,6 @@ async def ainvoke(
         self,
         inputs: InputType,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> bool:
         return inputs["backbone_output"] >= self.threshold
diff --git a/planning_library/components/runnable_component.py b/planning_library/components/runnable_component.py
index e9fd69c..e8dd0ae 100644
--- a/planning_library/components/runnable_component.py
+++ b/planning_library/components/runnable_component.py
@@ -43,13 +43,18 @@ def add_output_preprocessing(
         self.runnable = self.runnable | RunnableLambda(preprocess, afunc=apreprocess)
 
     def invoke(
-        self,
-        inputs: InputType,
-        run_manager: Optional[CallbackManager] = None,
+        self, inputs: InputType, run_manager: Optional[CallbackManager] = None, **kwargs
     ) -> OutputType:
+        config = kwargs
+        if "callbacks" not in config and run_manager:
+            config["callbacks"] = run_manager
+
+        if "run_name" not in config and self.name:
+            config["run_name"] = self.name
+
         outputs = self.runnable.invoke(
             inputs,
-            config={"callbacks": run_manager} if run_manager else {},
+            config=config,  # type: ignore[arg-type]
         )
         return outputs
 
@@ -57,9 +62,17 @@ async def ainvoke(
         self,
         inputs: InputType,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> OutputType:
+        config = kwargs
+        if "callbacks" not in config and run_manager:
+            config["callbacks"] = run_manager
+
+        if "run_name" not in config and self.name:
+            config["run_name"] = self.name
+
         outputs = await self.runnable.ainvoke(
             inputs,
-            config={"callbacks": run_manager} if run_manager else {},
+            config=config,  # type: ignore[arg-type]
         )
         return outputs
diff --git a/planning_library/strategies/reflexion/components/actor.py b/planning_library/strategies/reflexion/components/actor.py
index 376e201..b5dc49b 100644
--- a/planning_library/strategies/reflexion/components/actor.py
+++ b/planning_library/strategies/reflexion/components/actor.py
@@ -27,6 +27,8 @@ class ReflexionActor(AgentComponent[ReflexionActorInput]):
     be initialized with only a single user message.
     """
 
+    name = "Actor"
+
     required_prompt_input_vars = set(ReflexionActorInput.__annotations__) - {
         "inputs",
         "intermediate_steps",
diff --git a/planning_library/strategies/reflexion/components/evaluator.py b/planning_library/strategies/reflexion/components/evaluator.py
index 6f760d4..fb3abd5 100644
--- a/planning_library/strategies/reflexion/components/evaluator.py
+++ b/planning_library/strategies/reflexion/components/evaluator.py
@@ -30,6 +30,8 @@ class PreprocessedReflexionEvaluatorInput(TypedDict):
 class ReflexionEvaluator(
     Generic[OutputType], EvaluatorComponent[ReflexionEvaluatorInput, OutputType]
 ):
+    name = "Evaluator"
+
     required_prompt_input_vars = set(ReflexionEvaluatorInput.__annotations__) - {
         "inputs"
     }
diff --git a/planning_library/strategies/reflexion/components/self_reflection.py b/planning_library/strategies/reflexion/components/self_reflection.py
index 8f8aaee..fc19d0a 100644
--- a/planning_library/strategies/reflexion/components/self_reflection.py
+++ b/planning_library/strategies/reflexion/components/self_reflection.py
@@ -28,6 +28,8 @@ class PreprocessedReflexionSelfReflectionInput(TypedDict):
 class ReflexionSelfReflection(
     RunnableComponent[ReflexionSelfReflectionInput, Sequence[BaseMessage]]
 ):
+    name = "Self-Reflection"
+
     required_prompt_input_vars = set(ReflexionSelfReflectionInput.__annotations__) - {
         "inputs"
     }
diff --git a/planning_library/strategies/tot_dfs/components/thought_evaluator.py b/planning_library/strategies/tot_dfs/components/thought_evaluator.py
index 12f2f24..0b6261c 100644
--- a/planning_library/strategies/tot_dfs/components/thought_evaluator.py
+++ b/planning_library/strategies/tot_dfs/components/thought_evaluator.py
@@ -7,7 +7,9 @@
 from langchain_core.output_parsers import BaseOutputParser
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import Runnable
-
+from planning_library.strategies.tot_dfs.utils.format_agent_outputs import (
+    format_thought,
+)
 from textwrap import dedent
 from planning_library.function_calling_parsers import (
     ParserRegistry,
@@ -45,6 +47,8 @@ class ThoughtEvaluatorInput(TypedDict):
 class ThoughtEvaluator(
     Generic[OutputType], EvaluatorComponent[ThoughtEvaluatorInput, OutputType]
 ):
+    name = "Evaluate Thoughts"
+
     required_prompt_input_vars = set(ThoughtEvaluatorInput.__annotations__) - {"inputs"}
 
     @classmethod
@@ -65,12 +69,11 @@ def _create_default_prompt(
                     user_message,
                 ),
                 MessagesPlaceholder("intermediate_steps"),
+                ("human", "Here is the proposed next step:" ""),
+                MessagesPlaceholder("next_thought"),
                 (
                     "human",
                     dedent("""
-                     Here is the proposed next step: 
-                     {next_thought}
-                     
                      Your goal is to judge whether this proposal should be followed or discarded, 
                      how likely it is to lead to the success.
                      
@@ -114,7 +117,7 @@ def _preprocess_input(
 
             return {
                 **inputs["inputs"],
-                "next_thought": inputs["next_thought"],
+                "next_thought": format_thought(inputs["next_thought"]),
                 "intermediate_steps": intermediate_steps,
             }
 
diff --git a/planning_library/strategies/tot_dfs/components/thought_generator.py b/planning_library/strategies/tot_dfs/components/thought_generator.py
index fe827ff..b10fd58 100644
--- a/planning_library/strategies/tot_dfs/components/thought_generator.py
+++ b/planning_library/strategies/tot_dfs/components/thought_generator.py
@@ -13,6 +13,9 @@
     BaseFunctionCallingSingleActionParser,
     BaseFunctionCallingMultiActionParser,
 )
+from planning_library.strategies.tot_dfs.utils.format_agent_outputs import (
+    format_thoughts,
+)
 from dataclasses import dataclass
 
 
@@ -48,6 +51,8 @@ class ThoughtGenerator(
         ThoughtGeneratorInput, List[Union[List[AgentAction], AgentAction, AgentFinish]]
     ]
 ):
+    name = "Generate Thoughts"
+
     required_prompt_input_vars = set(ThoughtGeneratorInput.__annotations__) - {
         "inputs",
         "intermediate_steps",
@@ -94,12 +99,14 @@ def invoke(
         self,
         inputs: ThoughtGeneratorInput,
         run_manager: Optional[CallbackManager] = None,
+        **kwargs,
     ) -> List[List[AgentAction] | AgentAction | AgentFinish]:
         results: List[List[AgentAction] | AgentAction | AgentFinish] = []
         for _ in range(self.max_num_thoughts):
             cur_result = self.agent.invoke(
                 {**inputs, "previous_thoughts": results},
                 run_manager=run_manager,
+                **kwargs,
             )
             # TODO: how to fix mypy warning properly here?
             results.append(cur_result)  # type: ignore[arg-type]
@@ -110,12 +117,14 @@ async def ainvoke(
         self,
         inputs: ThoughtGeneratorInput,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> List[List[AgentAction] | AgentAction | AgentFinish]:
         results: List[List[AgentAction] | AgentAction | AgentFinish] = []
         for _ in range(self.max_num_thoughts):
             cur_result = await self.agent.ainvoke(
                 {**inputs, "previous_thoughts": results},
                 run_manager=run_manager,
+                **kwargs,
             )
             # TODO: how to fix mypy warning properly here?
             results.append(cur_result)  # type: ignore[arg-type]
@@ -174,4 +183,16 @@ def create(
             parser=parser,
             parser_name=parser_name,
         )
+
+        agent.add_input_preprocessing(
+            preprocess=lambda inputs: {
+                **{
+                    key: value
+                    for key, value in inputs.items()
+                    if key != "previous_thoughts"
+                },
+                "previous_thoughts": format_thoughts(inputs["previous_thoughts"]),
+            }
+        )
+
         return ThoughtGenerator(agent=agent, max_num_thoughts=max_num_thoughts)
diff --git a/planning_library/strategies/tot_dfs/components/thought_sorter.py b/planning_library/strategies/tot_dfs/components/thought_sorter.py
index 963b1bf..18ba2b3 100644
--- a/planning_library/strategies/tot_dfs/components/thought_sorter.py
+++ b/planning_library/strategies/tot_dfs/components/thought_sorter.py
@@ -5,6 +5,7 @@
 from langchain_core.agents import AgentAction, AgentFinish
 from langchain_core.language_models import BaseChatModel
 from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.messages import BaseMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import Runnable
 from itertools import combinations
@@ -18,6 +19,9 @@
 from typing_extensions import TypedDict
 from dataclasses import dataclass
 from collections import defaultdict
+from planning_library.strategies.tot_dfs.utils.format_agent_outputs import (
+    format_thought,
+)
 
 
 @dataclass
@@ -45,8 +49,8 @@ class ThoughtSorterInput(TypedDict):
 
 class ThoughtSorterRunnableInput(TypedDict):
     intermediate_steps: List[Tuple[AgentAction, str]]
-    thought1: List[AgentAction] | AgentAction | AgentFinish
-    thought2: List[AgentAction] | AgentAction | AgentFinish
+    thought1: List[BaseMessage]
+    thought2: List[BaseMessage]
 
 
 class ThoughtSorter(
@@ -65,6 +69,8 @@ class ThoughtSorter(
     https://github.com/OpenBMB/ToolBench/blob/2937497244096960a532b21f66f663ed78e08588/toolbench/inference/LLM_rank/rank_candidate.py#L53
     """
 
+    name = "Sort Thoughts"
+
     def __init__(
         self,
         runnable: Runnable[ThoughtSorterRunnableInput, str]
@@ -92,16 +98,13 @@ def _create_default_prompt(
                     user_message,
                 ),
                 MessagesPlaceholder("intermediate_steps"),
+                ("human", "Here is the first proposed next step"),
+                MessagesPlaceholder("thought1"),
+                ("human", "Here is the second proposed next step:"),
+                MessagesPlaceholder("thought2"),
                 (
                     "human",
-                    dedent("""
-                         Here is the first proposed next step: 
-                         {thought1}
-                         
-                         Here is the second proposed next step:
-                         {thought2}
-
-                         Your goal is to judge which of the proposed actions is more likely to lead to the success.
+                    dedent("""Your goal is to judge which of the proposed actions is more likely to lead to the success.
 
                          Take your time and comment your decision, 
                          but make sure to always output either 1 or 2, 
@@ -123,15 +126,17 @@ def _compare_pairwise(
         thought1: List[AgentAction] | AgentAction | AgentFinish,
         thought2: List[AgentAction] | AgentAction | AgentFinish,
         run_manager: Optional[CallbackManager] = None,
+        **kwargs,
     ) -> str:
         return self.runnable.invoke(
             {
                 **inputs,  # type: ignore[typeddict-item]
                 "intermediate_steps": intermediate_steps,
-                "thought1": thought1,
-                "thought2": thought2,
+                "thought1": format_thought(thought1),
+                "thought2": format_thought(thought2),
             },
             run_manager=run_manager,
+            **kwargs,
         )
 
     async def _acompare_pairwise(
@@ -141,13 +146,14 @@ async def _acompare_pairwise(
         thought1: List[AgentAction] | AgentAction | AgentFinish,
         thought2: List[AgentAction] | AgentAction | AgentFinish,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> str:
         return await self.runnable.ainvoke(
             {
                 **inputs,  # type: ignore[typeddict-item]
                 "intermediate_steps": intermediate_steps,
-                "thought1": thought1,
-                "thought2": thought2,
+                "thought1": format_thought(thought1),
+                "thought2": format_thought(thought2),
             },
             run_manager=run_manager,
         )
@@ -156,6 +162,7 @@ def invoke(
         self,
         inputs: ThoughtSorterInput,
         run_manager: Optional[CallbackManager] = None,
+        **kwargs,
     ) -> List[Union[List[AgentAction], AgentAction, AgentFinish]]:
         scores: Dict[Union[List[AgentAction], AgentAction, AgentFinish], float] = (
             defaultdict(float)
@@ -169,6 +176,7 @@ def invoke(
                 thought1=thought1,
                 thought2=thought2,
                 run_manager=run_manager,
+                **kwargs,
             )
 
             if result == "1":
@@ -186,6 +194,7 @@ async def ainvoke(
         self,
         inputs: ThoughtSorterInput,
         run_manager: Optional[AsyncCallbackManager] = None,
+        **kwargs,
     ) -> List[Union[List[AgentAction], AgentAction, AgentFinish]]:
         scores: Dict[Union[List[AgentAction], AgentAction, AgentFinish], float] = (
             defaultdict(float)
diff --git a/planning_library/strategies/tot_dfs/utils/__init__.py b/planning_library/strategies/tot_dfs/utils/__init__.py
index f46b30e..d512cf5 100644
--- a/planning_library/strategies/tot_dfs/utils/__init__.py
+++ b/planning_library/strategies/tot_dfs/utils/__init__.py
@@ -1,4 +1,3 @@
-from .evaluator_input import EvaluatorInput
 from .tot_node import ToTNode
 
-__all__ = ["ToTNode", "EvaluatorInput"]
+__all__ = ["ToTNode"]
diff --git a/planning_library/strategies/tot_dfs/utils/evaluator_input.py b/planning_library/strategies/tot_dfs/utils/evaluator_input.py
deleted file mode 100644
index c05ac38..0000000
--- a/planning_library/strategies/tot_dfs/utils/evaluator_input.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from typing import Dict, List, Tuple, Union
-
-from langchain_core.agents import AgentAction, AgentFinish
-from typing_extensions import TypedDict
-
-
-class EvaluatorInput(TypedDict):
-    inputs: Dict[str, str]
-    trajectory: List[Tuple[AgentAction, str]]
-    next_thought: Union[List[AgentAction], AgentAction, AgentFinish]
diff --git a/planning_library/strategies/tot_dfs/utils/format_agent_outputs.py b/planning_library/strategies/tot_dfs/utils/format_agent_outputs.py
new file mode 100644
index 0000000..9a8bdb6
--- /dev/null
+++ b/planning_library/strategies/tot_dfs/utils/format_agent_outputs.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from typing import List
+from langchain_core.agents import AgentAction, AgentFinish
+from langchain_core.messages import BaseMessage, AIMessage
+
+
+def format_thought(
+    thought: List[AgentAction] | AgentAction | AgentFinish,
+) -> List[BaseMessage]:
+    if isinstance(thought, list):
+        messages = []
+        for action in thought:
+            messages.extend(format_thought(action))
+        return messages
+    elif isinstance(thought, AgentAction):
+        return [
+            AIMessage(
+                content=f"Call tool `{thought.tool}` with arguments `{thought.tool_input}`"
+            )
+        ]
+    elif isinstance(thought, AgentFinish):
+        return [
+            AIMessage(
+                content=f"Finish execution with return values `{thought.return_values}`"
+            )
+        ]
+
+    raise ValueError(f"Unexpected type for `thought`: {type(thought)}")
+
+
+def format_thoughts(
+    thoughts: List[List[AgentAction] | AgentAction | AgentFinish],
+) -> List[BaseMessage]:
+    messages = []
+    for thought in thoughts:
+        messages.extend(format_thought(thought))
+    return messages