Merge pull request #13 from gnosis/evan/benchmark

Move benchmarking from evo.researcher
gnosis · Feb 15, 2024 · 2741963 · 2741963
2 parents 878ffea + 97c46bf
commit 2741963
Show file tree

Hide file tree

Showing 8 changed files with 1,534 additions and 5 deletions.
diff --git a/mypy.ini b/mypy.ini
@@ -1,5 +1,5 @@
 [mypy]
-python_version = 3.9
+python_version = 3.10
 files = prediction_market_agent_tooling/, tests/, examples/, scripts/
 plugins = pydantic.mypy
 warn_redundant_casts = True

diff --git a/poetry.lock b/poetry.lock
diff --git a/prediction_market_agent_tooling/benchmark/__init__.py b/prediction_market_agent_tooling/benchmark/__init__.py
diff --git a/prediction_market_agent_tooling/benchmark/agents.py b/prediction_market_agent_tooling/benchmark/agents.py
@@ -0,0 +1,86 @@
+import random
+import typing as t
+
+from prediction_market_agent_tooling.benchmark.utils import (
+    EvaluatedQuestion,
+    OutcomePrediction,
+    Prediction,
+)
+
+
+class AbstractBenchmarkedAgent:
+    def __init__(self, agent_name: str, max_workers: t.Optional[int] = None):
+        self.agent_name = agent_name
+        self.max_workers = max_workers  # Limit the number of workers that can run this worker in parallel threads
+
+    def evaluate(self, market_question: str) -> EvaluatedQuestion:
+        raise NotImplementedError
+
+    def research(self, market_question: str) -> t.Optional[str]:
+        raise NotImplementedError
+
+    def predict(
+        self, market_question: str, researched: str, evaluated: EvaluatedQuestion
+    ) -> Prediction:
+        raise NotImplementedError
+
+    def evaluate_research_predict(self, market_question: str) -> Prediction:
+        eval = self.evaluate(market_question=market_question)
+        if not eval.is_predictable:
+            return Prediction(evaluation=eval)
+        researched = self.research(market_question=market_question)
+        if researched is None:
+            return Prediction(evaluation=eval)
+        return self.predict(
+            market_question=market_question,
+            researched=researched,
+            evaluated=eval,
+        )
+
+
+class RandomAgent(AbstractBenchmarkedAgent):
+    def evaluate(self, market_question: str) -> EvaluatedQuestion:
+        return EvaluatedQuestion(question=market_question, is_predictable=True)
+
+    def research(self, market_question: str) -> str:
+        return ""  # No research for a random agent, but can't be None.
+
+    def predict(
+        self, market_question: str, researched: str, evaluated: EvaluatedQuestion
+    ) -> Prediction:
+        p_yes, confidence = random.random(), random.random()
+        return Prediction(
+            evaluation=evaluated,
+            outcome_prediction=OutcomePrediction(
+                p_yes=p_yes,
+                confidence=confidence,
+                info_utility=None,
+            ),
+        )
+
+
+class FixedAgent(AbstractBenchmarkedAgent):
+    def __init__(
+        self, fixed_answer: bool, agent_name: str, max_workers: int | None = None
+    ):
+        super().__init__(agent_name, max_workers)
+        self.fixed_answer = fixed_answer
+
+    def evaluate(self, market_question: str) -> EvaluatedQuestion:
+        return EvaluatedQuestion(question=market_question, is_predictable=True)
+
+    def research(self, market_question: str) -> str:
+        return ""  # No research for a fixed agent, but can't be None.
+
+    def predict(
+        self, market_question: str, researched: str, evaluated: EvaluatedQuestion
+    ) -> Prediction:
+        p_yes, confidence = 1.0 if self.fixed_answer else 0.0, 1.0
+        return Prediction(
+            evaluation=evaluated,
+            outcome_prediction=OutcomePrediction(
+                p_yes=p_yes,
+                confidence=confidence,
+                info_utility=None,
+            ),
+        )