Skip to content

Commit d99c1c4

Browse files
authored
Merge pull request SWE-bench#178 from princeton-nlp/add-schema-version
Add schema version to report card
2 parents 7a5729d + 44482e2 commit d99c1c4

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

swebench/harness/run_evaluation.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ def make_run_report(
357357
full_dataset: list,
358358
client: docker.DockerClient,
359359
run_id: str
360-
):
360+
) -> Path:
361361
"""
362362
Make a final evaluation and run report of the instances that have been run.
363363
Also reports on images and containers that may still running!
@@ -367,6 +367,9 @@ def make_run_report(
367367
full_dataset (list): List of all instances
368368
client (docker.DockerClient): Docker client
369369
run_id (str): Run ID
370+
371+
Returns:
372+
Path to report file
370373
"""
371374
# instantiate sets to store IDs of different outcomes
372375
completed_ids = set()
@@ -453,6 +456,7 @@ def make_run_report(
453456
"error_ids": list(sorted(error_ids)),
454457
"unstopped_containers": list(sorted(unstopped_containers)),
455458
"unremoved_images": list(sorted(unremoved_images)),
459+
"schema_version": 2,
456460
}
457461
report_file = Path(
458462
list(predictions.values())[0]["model_name_or_path"].replace("/", "__")
@@ -462,6 +466,7 @@ def make_run_report(
462466
with open(report_file, "w") as f:
463467
print(json.dumps(report, indent=4), file=f)
464468
print(f"Report written to {report_file}")
469+
return report_file
465470

466471

467472
def get_gold_predictions(dataset_name: str, split: str):

tests/test_evaluation.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import collections
2+
import json
3+
import docker
4+
5+
from swebench.harness.run_evaluation import make_run_report
6+
7+
TEST_INSTANCE = collections.defaultdict(lambda: "test")
8+
TEST_INSTANCE["PASS_TO_PASS"] = '[]'
9+
TEST_INSTANCE["repo"] = 'pvlib/pvlib-python'
10+
TEST_INSTANCE["version"] = '0.1'
11+
TEST_INSTANCE["FAIL_TO_PASS"] = '[]'
12+
13+
def test_make_run_report(tmpdir) -> None:
14+
client = docker.from_env()
15+
with tmpdir.as_cwd():
16+
output_path = make_run_report(
17+
{"test": {"instance_id": "test", "model_name_or_path": "test"}},
18+
[TEST_INSTANCE],
19+
client,
20+
"test"
21+
)
22+
assert output_path.is_file()
23+
report = json.loads(output_path.read_text())
24+
assert report["schema_version"] == 2

0 commit comments

Comments
 (0)