[eval,fix]: metrics get carried across eval instances (All-Hands-AI#3072

) * fix: make max_budget_per_task optional in `run_agent_controller` * update arg for each run infer * fix: metrics logging carried along; reset llm metric with the agent; --------- Co-authored-by: Graham Neubig <neubig@gmail.com>
invariantlabs-ai · Jul 23, 2024 · 41a8bb3 · 41a8bb3
1 parent da17665
commit 41a8bb3
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 2 deletions.
diff --git a/opendevin/controller/agent.py b/opendevin/controller/agent.py
@@ -57,6 +57,9 @@ def reset(self) -> None:
         # TODO clear history
         self._complete = False
 
+        if self.llm:
+            self.llm.reset()
+
     @property
     def name(self):
         return self.__class__.__name__

diff --git a/opendevin/controller/state/state.py b/opendevin/controller/state/state.py
@@ -97,9 +97,9 @@ class State:
     resume_state: AgentState | None = None
     traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
     # global metrics for the current task
-    metrics: Metrics = Metrics()
+    metrics: Metrics = field(default_factory=Metrics)
     # local metrics for the current subtask
-    local_metrics: Metrics = Metrics()
+    local_metrics: Metrics = field(default_factory=Metrics)
     # root agent has level 0, and every delegate increases the level by one
     delegate_level: int = 0
     # start_id and end_id track the range of events in history

diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
@@ -252,3 +252,6 @@ def __str__(self):
 
     def __repr__(self):
         return str(self)
+
+    def reset(self):
+        self.metrics = Metrics()