Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move benchmarking from evo.researcher #13

Merged
merged 9 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
553 changes: 552 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

Empty file.
86 changes: 86 additions & 0 deletions prediction_market_agent_tooling/benchmark/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import random
import typing as t

from prediction_market_agent_tooling.benchmark.utils import (
EvalautedQuestion,
OutcomePrediction,
Prediction,
)


class AbstractBenchmarkedAgent:
def __init__(self, agent_name: str, max_workers: t.Optional[int] = None):
self.agent_name = agent_name
self.max_workers = max_workers # Limit the number of workers that can run this worker in parallel threads
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved

def evaluate(self, market_question: str) -> EvalautedQuestion:
raise NotImplementedError
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved

def research(self, market_question: str) -> t.Optional[str]:
raise NotImplementedError

def predict(
self, market_question: str, researched: str, evaluated: EvalautedQuestion
) -> Prediction:
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved
raise NotImplementedError

def evaluate_research_predict(self, market_question: str) -> Prediction:
eval = self.evaluate(market_question=market_question)
if not eval.is_predictable:
return Prediction(evaluation=eval)
researched = self.research(market_question=market_question)
if researched is None:
return Prediction(evaluation=eval)
return self.predict(
market_question=market_question,
researched=researched,
evaluated=eval,
)


class RandomAgent(AbstractBenchmarkedAgent):
def evaluate(self, market_question: str) -> EvalautedQuestion:
return EvalautedQuestion(question=market_question, is_predictable=True)
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved

def research(self, market_question: str) -> str:
return "" # No research for a random agent, but can't be None.

def predict(
self, market_question: str, researched: str, evaluated: EvalautedQuestion
) -> Prediction:
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved
p_yes, confidence = random.random(), random.random()
return Prediction(
evaluation=evaluated,
outcome_prediction=OutcomePrediction(
p_yes=p_yes,
confidence=confidence,
info_utility=None,
),
)


class FixedAgent(AbstractBenchmarkedAgent):
def __init__(
self, fixed_answer: bool, agent_name: str, max_workers: int | None = None
):
super().__init__(agent_name, max_workers)
self.fixed_answer = fixed_answer

def evaluate(self, market_question: str) -> EvalautedQuestion:
return EvalautedQuestion(question=market_question, is_predictable=True)
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved

def research(self, market_question: str) -> str:
return "" # No research for a fixed agent, but can't be None.

def predict(
self, market_question: str, researched: str, evaluated: EvalautedQuestion
) -> Prediction:
evangriffiths marked this conversation as resolved.
Show resolved Hide resolved
p_yes, confidence = 1.0 if self.fixed_answer else 0.0, 1.0
return Prediction(
evaluation=evaluated,
outcome_prediction=OutcomePrediction(
p_yes=p_yes,
confidence=confidence,
info_utility=None,
),
)
Loading
Loading