forked from balrog-ai/BALROG
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
66 lines (53 loc) · 1.98 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import logging
import os
import sys
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
import hydra
from hydra.utils import get_original_cwd
from omegaconf import DictConfig
from balrog.agents import AgentFactory
from balrog.evaluator import EvaluatorManager
from balrog.utils import collect_and_summarize_results, print_summary_table, setup_environment
@contextmanager
def redirect_to_file(filepath):
original = sys.stdout
with open(filepath, "w") as file:
sys.stdout = file
try:
yield
finally:
sys.stdout = original
@hydra.main(config_path="balrog/config", config_name="config", version_base="1.1")
def main(config: DictConfig):
original_cwd = get_original_cwd()
setup_environment(original_cwd=original_cwd)
# Determine output directory
if config.eval.resume_from is not None:
output_dir = config.eval.resume_from
else:
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
run_name = f"{timestamp}_{config.agent.type}_{config.client.model_id.replace('/', '_')}"
output_dir = os.path.join(config.eval.output_dir, run_name)
# Create the directory if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)
# Setup logger
log_filename = os.path.join(output_dir, "eval.log")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.FileHandler(log_filename)],
force=True,
)
# Create an EvaluatorManager and run evaluation
evaluator_manager = EvaluatorManager(config, original_cwd=original_cwd, output_dir=output_dir)
agent_factory = AgentFactory(config)
with redirect_to_file(log_filename):
evaluator_manager.run(agent_factory)
# Collect and summarize results
summary = collect_and_summarize_results(output_dir)
print_summary_table(summary)
if __name__ == "__main__":
main()