Skip to content

Commit 5836826

Browse files
committed
Replace strings with constants
1 parent 55a5dd3 commit 5836826

File tree

8 files changed

+43
-36
lines changed

8 files changed

+43
-36
lines changed

swebench/harness/dockerfiles.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from functools import partial
2-
31
# IF you change the base image, you need to rebuild all images (run with --force_rebuild)
42
_DOCKERFILE_BASE = r"""
53
FROM --platform={platform} ubuntu:22.04

swebench/harness/grading.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
APPLY_PATCH_PASS,
77
FAIL_TO_FAIL,
88
FAIL_TO_PASS,
9+
KEY_INSTANCE_ID,
910
PASS_TO_FAIL,
1011
PASS_TO_PASS,
1112
RESET_FAILED,
@@ -225,7 +226,7 @@ def get_eval_report(
225226
"""
226227
report_map = {}
227228

228-
instance_id = prediction["instance_id"]
229+
instance_id = prediction[KEY_INSTANCE_ID]
229230
if instance_id not in report_map:
230231
report_map[instance_id] = {
231232
"patch_is_None": False,
@@ -248,13 +249,13 @@ def get_eval_report(
248249
report_map[instance_id]["patch_successfully_applied"] = True
249250

250251
eval_ref = {
251-
"instance_id": test_spec.instance_id,
252-
"FAIL_TO_PASS": test_spec.FAIL_TO_PASS,
253-
"PASS_TO_PASS": test_spec.PASS_TO_PASS,
252+
KEY_INSTANCE_ID: test_spec.instance_id,
253+
FAIL_TO_PASS: test_spec.FAIL_TO_PASS,
254+
PASS_TO_PASS: test_spec.PASS_TO_PASS,
254255
}
255256

256257
report = get_eval_tests_report(eval_sm, eval_ref)
257-
if get_resolution_status(report) == "RESOLVED_FULL":
258+
if get_resolution_status(report) == ResolvedStatus.FULL.value:
258259
report_map[instance_id]["resolved"] = True
259260

260261
if include_tests_status:

swebench/harness/log_parsers.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
import re
22
from enum import Enum
3-
4-
5-
class TestStatus(Enum):
6-
FAILED = "FAILED"
7-
PASSED = "PASSED"
8-
SKIPPED = "SKIPPED"
9-
ERROR = "ERROR"
3+
from swebench.harness.constants import TestStatus
104

115

126
def parse_log_pytest(log: str) -> dict[str, str]:

swebench/harness/prepare_images.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from argparse import ArgumentParser
55

6+
from swebench.harness.constants import KEY_INSTANCE_ID
67
from swebench.harness.docker_build import build_instance_images
78
from swebench.harness.docker_utils import list_images
89
from swebench.harness.test_spec import make_test_spec
@@ -29,12 +30,12 @@ def filter_dataset_to_build(
2930
data_to_build = []
3031

3132
# Check if all instance IDs are in the dataset
32-
not_in_dataset = set(instance_ids).difference(set([instance["instance_id"] for instance in dataset]))
33+
not_in_dataset = set(instance_ids).difference(set([instance[KEY_INSTANCE_ID] for instance in dataset]))
3334
if not_in_dataset:
3435
raise ValueError(f"Instance IDs not found in dataset: {not_in_dataset}")
3536

3637
for instance in dataset:
37-
if instance["instance_id"] not in instance_ids:
38+
if instance[KEY_INSTANCE_ID] not in instance_ids:
3839
# Skip instances not in the list
3940
continue
4041

swebench/harness/run_evaluation.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
APPLY_PATCH_FAIL,
1515
APPLY_PATCH_PASS,
1616
INSTANCE_IMAGE_BUILD_DIR,
17+
KEY_INSTANCE_ID,
1718
RUN_EVALUATION_LOG_DIR,
1819
)
1920
from swebench.harness.docker_utils import (
@@ -302,7 +303,7 @@ def get_dataset_from_preds(
302303
"""
303304
# load dataset
304305
dataset = load_swebench_dataset(dataset_name, split)
305-
dataset_ids = {i["instance_id"] for i in dataset}
306+
dataset_ids = {i[KEY_INSTANCE_ID] for i in dataset}
306307

307308
if instance_ids:
308309
# check that all instance IDs are in the dataset
@@ -331,34 +332,34 @@ def get_dataset_from_preds(
331332

332333
if instance_ids:
333334
# filter dataset to just the instance IDs
334-
dataset = [i for i in dataset if i["instance_id"] in instance_ids]
335+
dataset = [i for i in dataset if i[KEY_INSTANCE_ID] in instance_ids]
335336

336337
# check which instance IDs have already been run
337338
completed_ids = set()
338339
for instance in dataset:
339-
if instance["instance_id"] not in prediction_ids:
340+
if instance[KEY_INSTANCE_ID] not in prediction_ids:
340341
# skip instances without predictions
341342
continue
342-
prediction = predictions[instance["instance_id"]]
343+
prediction = predictions[instance[KEY_INSTANCE_ID]]
343344
report_file = (
344345
RUN_EVALUATION_LOG_DIR
345346
/ run_id
346347
/ prediction["model_name_or_path"].replace("/", "__")
347-
/ prediction["instance_id"]
348+
/ prediction[KEY_INSTANCE_ID]
348349
/ "report.json"
349350
)
350351
if report_file.exists():
351-
completed_ids.add(instance["instance_id"])
352+
completed_ids.add(instance[KEY_INSTANCE_ID])
352353

353354
if completed_ids and exclude_completed:
354355
# filter dataset to only instances that have not been run
355356
print(f"{len(completed_ids)} instances already run, skipping...")
356-
dataset = [i for i in dataset if i["instance_id"] not in completed_ids]
357+
dataset = [i for i in dataset if i[KEY_INSTANCE_ID] not in completed_ids]
357358

358359
empty_patch_ids = {k for k, v in predictions.items() if v["model_patch"] == "" or v["model_patch"] is None}
359360

360361
# filter dataset to only instances with predictions
361-
dataset = [i for i in dataset if i["instance_id"] in prediction_ids and i["instance_id"] not in empty_patch_ids]
362+
dataset = [i for i in dataset if i[KEY_INSTANCE_ID] in prediction_ids and i[KEY_INSTANCE_ID] not in empty_patch_ids]
362363
return dataset
363364

364365

@@ -394,7 +395,7 @@ def make_run_report(
394395

395396
# iterate through dataset and check if the instance has been run
396397
for instance in full_dataset:
397-
instance_id = instance["instance_id"]
398+
instance_id = instance[KEY_INSTANCE_ID]
398399
if instance_id not in predictions:
399400
# skip instances without
400401
incomplete_ids.add(instance_id)
@@ -407,7 +408,7 @@ def make_run_report(
407408
RUN_EVALUATION_LOG_DIR
408409
/ run_id
409410
/ prediction["model_name_or_path"].replace("/", "__")
410-
/ prediction["instance_id"]
411+
/ prediction[KEY_INSTANCE_ID]
411412
/ "report.json"
412413
)
413414
if report_file.exists():
@@ -486,7 +487,7 @@ def get_gold_predictions(dataset_name: str, split: str):
486487
dataset = load_swebench_dataset(dataset_name, split)
487488
return [
488489
{
489-
"instance_id": datum["instance_id"],
490+
KEY_INSTANCE_ID: datum[KEY_INSTANCE_ID],
490491
"model_patch": datum["patch"],
491492
"model_name_or_path": "gold",
492493
} for datum in dataset
@@ -527,7 +528,7 @@ def main(
527528
predictions = [json.loads(line) for line in f]
528529
else:
529530
raise ValueError("Predictions path must be \"gold\", .json, or .jsonl")
530-
predictions = {pred["instance_id"]: pred for pred in predictions}
531+
predictions = {pred[KEY_INSTANCE_ID]: pred for pred in predictions}
531532

532533
# get dataset from predictions
533534
dataset = get_dataset_from_preds(dataset_name, split, instance_ids, predictions, run_id)

swebench/harness/test_spec.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99
from swebench.harness.constants import (
1010
SWEbenchInstance,
11+
KEY_INSTANCE_ID,
12+
FAIL_TO_PASS,
13+
PASS_TO_PASS,
1114
MAP_REPO_TO_INSTALL,
1215
MAP_REPO_VERSION_TO_SPECS,
1316
USE_X86,
@@ -255,7 +258,7 @@ def make_eval_script_list(instance, specs, env_name, repo_directory, base_commit
255258
def make_test_spec(instance: SWEbenchInstance) -> TestSpec:
256259
if isinstance(instance, TestSpec):
257260
return instance
258-
instance_id = instance["instance_id"]
261+
instance_id = instance[KEY_INSTANCE_ID]
259262
repo = instance["repo"]
260263
version = instance["version"]
261264
base_commit = instance["base_commit"]
@@ -269,8 +272,8 @@ def _from_json_or_obj(key: str) -> Any:
269272
return json.loads(instance[key])
270273
return instance[key]
271274

272-
pass_to_pass = _from_json_or_obj("PASS_TO_PASS")
273-
fail_to_pass = _from_json_or_obj("FAIL_TO_PASS")
275+
pass_to_pass = _from_json_or_obj(PASS_TO_PASS)
276+
fail_to_pass = _from_json_or_obj(FAIL_TO_PASS)
274277

275278
env_name = "testbed"
276279
repo_directory = f"/{env_name}"

swebench/harness/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66

77
from argparse import ArgumentTypeError
88
from datasets import Dataset, load_dataset
9-
from datetime import datetime
109
from dotenv import load_dotenv
1110
from functools import cache
12-
from git import Repo
1311
from typing import cast
1412

1513
from swebench.harness.constants import (

tests/test_evaluation.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,30 @@
22
import json
33
import docker
44

5+
from swebench.harness.constants import (
6+
FAIL_TO_PASS,
7+
PASS_TO_PASS,
8+
KEY_INSTANCE_ID,
9+
KEY_MODEL,
10+
)
511
from swebench.harness.run_evaluation import make_run_report
612

713
TEST_INSTANCE = collections.defaultdict(lambda: "test")
8-
TEST_INSTANCE["PASS_TO_PASS"] = '[]'
14+
TEST_INSTANCE[PASS_TO_PASS] = '[]'
915
TEST_INSTANCE["repo"] = 'pvlib/pvlib-python'
1016
TEST_INSTANCE["version"] = '0.1'
11-
TEST_INSTANCE["FAIL_TO_PASS"] = '[]'
17+
TEST_INSTANCE[FAIL_TO_PASS] = '[]'
1218

1319
def test_make_run_report(tmpdir) -> None:
1420
client = docker.from_env()
1521
with tmpdir.as_cwd():
1622
output_path = make_run_report(
17-
{"test": {"instance_id": "test", "model_name_or_path": "test"}},
23+
{
24+
"test": {
25+
KEY_INSTANCE_ID: "test",
26+
KEY_MODEL: "test"
27+
}
28+
},
1829
[TEST_INSTANCE],
1930
client,
2031
"test"

0 commit comments

Comments
 (0)