Replace strings with constants

john-b-yang · john-b-yang · commit 58368263eb92 · 2024-07-15T18:29:31.000-04:00
diff --git a/swebench/harness/dockerfiles.py b/swebench/harness/dockerfiles.py
@@ -1,5 +1,3 @@
-from functools import partial
-
 # IF you change the base image, you need to rebuild all images (run with --force_rebuild)
 _DOCKERFILE_BASE = r"""
 FROM --platform={platform} ubuntu:22.04
diff --git a/swebench/harness/grading.py b/swebench/harness/grading.py
@@ -6,6 +6,7 @@
     APPLY_PATCH_PASS,
     FAIL_TO_FAIL,
     FAIL_TO_PASS,
+    KEY_INSTANCE_ID,
     PASS_TO_FAIL,
     PASS_TO_PASS,
     RESET_FAILED,
@@ -225,7 +226,7 @@ def get_eval_report(
     """
     report_map = {}
 
-    instance_id = prediction["instance_id"]
+    instance_id = prediction[KEY_INSTANCE_ID]
     if instance_id not in report_map:
         report_map[instance_id] = {
             "patch_is_None": False,
@@ -248,13 +249,13 @@ def get_eval_report(
     report_map[instance_id]["patch_successfully_applied"] = True
 
     eval_ref = {
-        "instance_id": test_spec.instance_id,
-        "FAIL_TO_PASS": test_spec.FAIL_TO_PASS,
-        "PASS_TO_PASS": test_spec.PASS_TO_PASS,
+        KEY_INSTANCE_ID: test_spec.instance_id,
+        FAIL_TO_PASS: test_spec.FAIL_TO_PASS,
+        PASS_TO_PASS: test_spec.PASS_TO_PASS,
     }
 
     report = get_eval_tests_report(eval_sm, eval_ref)
-    if get_resolution_status(report) == "RESOLVED_FULL":
+    if get_resolution_status(report) == ResolvedStatus.FULL.value:
         report_map[instance_id]["resolved"] = True
 
     if include_tests_status:
diff --git a/swebench/harness/log_parsers.py b/swebench/harness/log_parsers.py
@@ -1,12 +1,6 @@
 import re
 from enum import Enum
-
-
-class TestStatus(Enum):
-    FAILED = "FAILED"
-    PASSED = "PASSED"
-    SKIPPED = "SKIPPED"
-    ERROR = "ERROR"
+from swebench.harness.constants import TestStatus
 
 
 def parse_log_pytest(log: str) -> dict[str, str]:
diff --git a/swebench/harness/prepare_images.py b/swebench/harness/prepare_images.py
@@ -3,6 +3,7 @@
 
 from argparse import ArgumentParser
 
+from swebench.harness.constants import KEY_INSTANCE_ID
 from swebench.harness.docker_build import build_instance_images
 from swebench.harness.docker_utils import list_images
 from swebench.harness.test_spec import make_test_spec
@@ -29,12 +30,12 @@ def filter_dataset_to_build(
     data_to_build = []
 
     # Check if all instance IDs are in the dataset
-    not_in_dataset = set(instance_ids).difference(set([instance["instance_id"] for instance in dataset]))
+    not_in_dataset = set(instance_ids).difference(set([instance[KEY_INSTANCE_ID] for instance in dataset]))
     if not_in_dataset:
         raise ValueError(f"Instance IDs not found in dataset: {not_in_dataset}")
 
     for instance in dataset:
-        if instance["instance_id"] not in instance_ids:
+        if instance[KEY_INSTANCE_ID] not in instance_ids:
             # Skip instances not in the list
             continue
 
diff --git a/swebench/harness/run_evaluation.py b/swebench/harness/run_evaluation.py
@@ -14,6 +14,7 @@
     APPLY_PATCH_FAIL,
     APPLY_PATCH_PASS,
     INSTANCE_IMAGE_BUILD_DIR,
+    KEY_INSTANCE_ID,
     RUN_EVALUATION_LOG_DIR,
 )
 from swebench.harness.docker_utils import (
@@ -302,7 +303,7 @@ def get_dataset_from_preds(
     """
     # load dataset
     dataset = load_swebench_dataset(dataset_name, split)
-    dataset_ids = {i["instance_id"] for i in dataset}
+    dataset_ids = {i[KEY_INSTANCE_ID] for i in dataset}
 
     if instance_ids:
         # check that all instance IDs are in the dataset
@@ -331,34 +332,34 @@ def get_dataset_from_preds(
 
     if instance_ids:
         # filter dataset to just the instance IDs
-        dataset = [i for i in dataset if i["instance_id"] in instance_ids]
+        dataset = [i for i in dataset if i[KEY_INSTANCE_ID] in instance_ids]
 
     # check which instance IDs have already been run
     completed_ids = set()
     for instance in dataset:
-        if instance["instance_id"] not in prediction_ids:
+        if instance[KEY_INSTANCE_ID] not in prediction_ids:
             # skip instances without predictions
             continue
-        prediction = predictions[instance["instance_id"]]
+        prediction = predictions[instance[KEY_INSTANCE_ID]]
         report_file = (
             RUN_EVALUATION_LOG_DIR
             / run_id
             / prediction["model_name_or_path"].replace("/", "__")
-            / prediction["instance_id"]
+            / prediction[KEY_INSTANCE_ID]
             / "report.json"
         )
         if report_file.exists():
-            completed_ids.add(instance["instance_id"])
+            completed_ids.add(instance[KEY_INSTANCE_ID])
 
     if completed_ids and exclude_completed:
         # filter dataset to only instances that have not been run
         print(f"{len(completed_ids)} instances already run, skipping...")
-        dataset = [i for i in dataset if i["instance_id"] not in completed_ids]
+        dataset = [i for i in dataset if i[KEY_INSTANCE_ID] not in completed_ids]
 
     empty_patch_ids = {k for k, v in predictions.items() if v["model_patch"] == "" or v["model_patch"] is None}
 
     # filter dataset to only instances with predictions
-    dataset = [i for i in dataset if i["instance_id"] in prediction_ids and i["instance_id"] not in empty_patch_ids]
+    dataset = [i for i in dataset if i[KEY_INSTANCE_ID] in prediction_ids and i[KEY_INSTANCE_ID] not in empty_patch_ids]
     return dataset
 
 
@@ -394,7 +395,7 @@ def make_run_report(
 
     # iterate through dataset and check if the instance has been run
     for instance in full_dataset:
-        instance_id = instance["instance_id"]
+        instance_id = instance[KEY_INSTANCE_ID]
         if instance_id not in predictions:
             # skip instances without 
             incomplete_ids.add(instance_id)
@@ -407,7 +408,7 @@ def make_run_report(
             RUN_EVALUATION_LOG_DIR
             / run_id
             / prediction["model_name_or_path"].replace("/", "__")
-            / prediction["instance_id"]
+            / prediction[KEY_INSTANCE_ID]
             / "report.json"
         )
         if report_file.exists():
@@ -486,7 +487,7 @@ def get_gold_predictions(dataset_name: str, split: str):
     dataset = load_swebench_dataset(dataset_name, split)
     return [
         {
-            "instance_id": datum["instance_id"],
+            KEY_INSTANCE_ID: datum[KEY_INSTANCE_ID],
             "model_patch": datum["patch"],
             "model_name_or_path": "gold",
         } for datum in dataset
@@ -527,7 +528,7 @@ def main(
                 predictions = [json.loads(line) for line in f]
         else:
             raise ValueError("Predictions path must be \"gold\", .json, or .jsonl")
-    predictions = {pred["instance_id"]: pred for pred in predictions}
+    predictions = {pred[KEY_INSTANCE_ID]: pred for pred in predictions}
 
     # get dataset from predictions
     dataset = get_dataset_from_preds(dataset_name, split, instance_ids, predictions, run_id)
diff --git a/swebench/harness/test_spec.py b/swebench/harness/test_spec.py
@@ -8,6 +8,9 @@
 
 from swebench.harness.constants import (
     SWEbenchInstance,
+    KEY_INSTANCE_ID,
+    FAIL_TO_PASS,
+    PASS_TO_PASS,
     MAP_REPO_TO_INSTALL,
     MAP_REPO_VERSION_TO_SPECS,
     USE_X86,
@@ -255,7 +258,7 @@ def make_eval_script_list(instance, specs, env_name, repo_directory, base_commit
 def make_test_spec(instance: SWEbenchInstance) -> TestSpec:
     if isinstance(instance, TestSpec):
         return instance
-    instance_id = instance["instance_id"]
+    instance_id = instance[KEY_INSTANCE_ID]
     repo = instance["repo"]
     version = instance["version"]
     base_commit = instance["base_commit"]
@@ -269,8 +272,8 @@ def _from_json_or_obj(key: str) -> Any:
             return json.loads(instance[key])
         return instance[key]
 
-    pass_to_pass = _from_json_or_obj("PASS_TO_PASS")
-    fail_to_pass = _from_json_or_obj("FAIL_TO_PASS")
+    pass_to_pass = _from_json_or_obj(PASS_TO_PASS)
+    fail_to_pass = _from_json_or_obj(FAIL_TO_PASS)
 
     env_name = "testbed"
     repo_directory = f"/{env_name}"
diff --git a/swebench/harness/utils.py b/swebench/harness/utils.py
@@ -6,10 +6,8 @@
 
 from argparse import ArgumentTypeError
 from datasets import Dataset, load_dataset
-from datetime import datetime
 from dotenv import load_dotenv
 from functools import cache
-from git import Repo
 from typing import cast
 
 from swebench.harness.constants import (
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
@@ -2,19 +2,30 @@
 import json
 import docker
 
+from swebench.harness.constants import (
+    FAIL_TO_PASS,
+    PASS_TO_PASS,
+    KEY_INSTANCE_ID,
+    KEY_MODEL,
+)
 from swebench.harness.run_evaluation import make_run_report
 
 TEST_INSTANCE = collections.defaultdict(lambda: "test")
-TEST_INSTANCE["PASS_TO_PASS"] = '[]'
+TEST_INSTANCE[PASS_TO_PASS] = '[]'
 TEST_INSTANCE["repo"] = 'pvlib/pvlib-python'
 TEST_INSTANCE["version"] = '0.1'
-TEST_INSTANCE["FAIL_TO_PASS"] = '[]'
+TEST_INSTANCE[FAIL_TO_PASS] = '[]'
 
 def test_make_run_report(tmpdir) -> None:
     client = docker.from_env()
     with tmpdir.as_cwd():
         output_path = make_run_report(
-            {"test": {"instance_id": "test", "model_name_or_path": "test"}},
+            {
+                "test": {
+                    KEY_INSTANCE_ID: "test",
+                    KEY_MODEL: "test"
+                }
+            },
             [TEST_INSTANCE],
             client,
             "test"

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-from functools import partial`
`2`		`-`
`3`	`1`	`# IF you change the base image, you need to rebuild all images (run with --force_rebuild)`
`4`	`2`	`_DOCKERFILE_BASE = r"""`
`5`	`3`	`FROM --platform={platform} ubuntu:22.04`