From cf792a56fe94cbaafaba460abf9280874313fc8a Mon Sep 17 00:00:00 2001 From: ibraheem-latent Date: Mon, 2 Feb 2026 16:04:25 -0800 Subject: [PATCH 1/6] add validate_patched_files_syntax --- utils/diff.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/utils/diff.py b/utils/diff.py index 216d92175..67a20964f 100644 --- a/utils/diff.py +++ b/utils/diff.py @@ -1,5 +1,6 @@ """Utilities for computing diffs between files.""" +import ast import os import tempfile import subprocess @@ -121,4 +122,56 @@ def apply_diff_to_local_repo(diff, local_repo_dir) -> None: # Check if the diff was applied successfully if result.returncode != 0: - logger.fatal(f"Failed to apply diff to {local_repo_dir}: {result.stderr.strip()}") \ No newline at end of file + logger.fatal(f"Failed to apply diff to {local_repo_dir}: {result.stderr.strip()}") + + +def validate_patched_files_syntax(repo_dir: str) -> Tuple[bool, Optional[str]]: + """ + After a patch has been applied, check that modified files have valid syntax. + Supports Python (.py) and JavaScript (.js, .mjs) files. + + Args: + repo_dir: The repository directory where the patch was applied + + Returns: + (is_valid: bool, error_message: Optional[str]) + """ + result = subprocess.run( + ["git", "diff", "--name-only"], + cwd=repo_dir, + capture_output=True, + text=True + ) + if result.returncode != 0: + return False, f"Failed to get modified files: {result.stderr.strip()}" + + modified_files = [f.strip() for f in result.stdout.strip().splitlines() if f.strip()] + + errors = [] + for filepath in modified_files: + full_path = os.path.join(repo_dir, filepath) + if not os.path.exists(full_path): + continue + + if filepath.endswith(".py"): + try: + with open(full_path, "r") as f: + source = f.read() + ast.parse(source, filename=filepath) + except SyntaxError as e: + errors.append(f"{filepath}:{e.lineno}: {e.msg}") + + elif filepath.endswith((".js", ".mjs")): + with open(full_path, "r") as f: + source = f.read() + result = subprocess.run( + ["node", "--input-type=module", "--check"], + input=source, + capture_output=True, text=True + ) + if result.returncode != 0: + errors.append(f"{filepath}: {result.stderr.strip()}") + + if errors: + return False, "Patched files have syntax errors:\n" + "\n".join(errors) + return True, None From 7de431723241c25b0850290995d0e186912db55a Mon Sep 17 00:00:00 2001 From: ibraheem-latent Date: Mon, 2 Feb 2026 16:04:52 -0800 Subject: [PATCH 2/6] ruff formatting --- utils/diff.py | 56 +++++++++++++++------------------------------------ 1 file changed, 16 insertions(+), 40 deletions(-) diff --git a/utils/diff.py b/utils/diff.py index 67a20964f..aee6edcc7 100644 --- a/utils/diff.py +++ b/utils/diff.py @@ -9,15 +9,14 @@ from typing import Tuple, Optional - def get_file_diff(old_path, new_path) -> str: """ Gets the diff between two files. - + Args: old_path: The path to the old file new_path: The path to the new file - + Returns: The diff between the two files, expressed as a diff of the old file, as a string. """ @@ -29,13 +28,9 @@ def get_file_diff(old_path, new_path) -> str: missing.append(new_path) if missing: logger.fatal(f"File(s) not found for diff: {', '.join(missing)}") - + # Use diff command - result = subprocess.run( - ["diff", "-u", old_path, new_path], - capture_output=True, - text=True - ) + result = subprocess.run(["diff", "-u", old_path, new_path], capture_output=True, text=True) # Check if the diff was generated successfully # `diff -u` return codes: @@ -54,39 +49,33 @@ def get_file_diff(old_path, new_path) -> str: filename = os.path.basename(old_path) lines[0] = f"--- {filename}" lines[1] = f"+++ {filename}" - - return "\n".join(lines) + return "\n".join(lines) def validate_diff_for_local_repo(diff, local_repo_dir) -> Tuple[bool, Optional[str]]: """ Validates if a diff string is valid and can be applied to a local repository. - + Args: diff: The diff string to validate local_repo_dir: The local repository directory - + Returns: (is_valid: bool, error_message: Optional[str]) """ - + # Write diff to temp file with tempfile.NamedTemporaryFile(mode="w", suffix=".diff", delete=False) as f: f.write(diff) diff_file = f.name - + # Use `git apply --check` to validate without applying - result = subprocess.run( - ["git", "apply", "--check", diff_file], - cwd=local_repo_dir, - capture_output=True, - text=True - ) + result = subprocess.run(["git", "apply", "--check", diff_file], cwd=local_repo_dir, capture_output=True, text=True) # Delete the temp file os.unlink(diff_file) - + # Check if the diff was applied successfully if result.returncode == 0: return True, None @@ -94,11 +83,10 @@ def validate_diff_for_local_repo(diff, local_repo_dir) -> Tuple[bool, Optional[s return False, result.stderr.strip() - def apply_diff_to_local_repo(diff, local_repo_dir) -> None: """ Applies a diff string to files in the source directory. - + Args: diff: The diff string to apply local_repo_dir: The local repository directory @@ -108,14 +96,9 @@ def apply_diff_to_local_repo(diff, local_repo_dir) -> None: with tempfile.NamedTemporaryFile(mode="w", suffix=".diff", delete=False) as f: f.write(diff) diff_file = f.name - + # Use `git apply` to apply the diff - result = subprocess.run( - ["git", "apply", diff_file], - cwd=local_repo_dir, - capture_output=True, - text=True - ) + result = subprocess.run(["git", "apply", diff_file], cwd=local_repo_dir, capture_output=True, text=True) # Delete the temp file os.unlink(diff_file) @@ -136,12 +119,7 @@ def validate_patched_files_syntax(repo_dir: str) -> Tuple[bool, Optional[str]]: Returns: (is_valid: bool, error_message: Optional[str]) """ - result = subprocess.run( - ["git", "diff", "--name-only"], - cwd=repo_dir, - capture_output=True, - text=True - ) + result = subprocess.run(["git", "diff", "--name-only"], cwd=repo_dir, capture_output=True, text=True) if result.returncode != 0: return False, f"Failed to get modified files: {result.stderr.strip()}" @@ -165,9 +143,7 @@ def validate_patched_files_syntax(repo_dir: str) -> Tuple[bool, Optional[str]]: with open(full_path, "r") as f: source = f.read() result = subprocess.run( - ["node", "--input-type=module", "--check"], - input=source, - capture_output=True, text=True + ["node", "--input-type=module", "--check"], input=source, capture_output=True, text=True ) if result.returncode != 0: errors.append(f"{filepath}: {result.stderr.strip()}") From a087fbd11aedd66a6f7ab20a7a26f1d9a1c7e054 Mon Sep 17 00:00:00 2001 From: ibraheem-latent Date: Mon, 2 Feb 2026 16:05:20 -0800 Subject: [PATCH 3/6] syntax check in polyglot suite --- evaluator/problem_suites/polyglot/polyglot_suite.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/evaluator/problem_suites/polyglot/polyglot_suite.py b/evaluator/problem_suites/polyglot/polyglot_suite.py index bd9db98ac..1c8fa9d85 100644 --- a/evaluator/problem_suites/polyglot/polyglot_suite.py +++ b/evaluator/problem_suites/polyglot/polyglot_suite.py @@ -18,7 +18,7 @@ from utils.git import init_local_repo_with_initial_commit from evaluator.sandbox.sandbox_manager import SandboxManager from evaluator.problem_suites.problem_suite import ProblemSuite, ProblemSuiteName -from utils.diff import get_file_diff, apply_diff_to_local_repo, validate_diff_for_local_repo +from utils.diff import get_file_diff, apply_diff_to_local_repo, validate_diff_for_local_repo, validate_patched_files_syntax @@ -147,7 +147,13 @@ def _on_mount(temp_dir: str): # Apply the patch apply_diff_to_local_repo(patch, sandbox_repo_dir) - + # Syntax-check the patched files + is_valid, error_message = validate_patched_files_syntax(sandbox_repo_dir) + if not is_valid: + raise EvaluationRunException( + EvaluationRunErrorCode.AGENT_INVALID_PATCH, + f"{EvaluationRunErrorCode.AGENT_INVALID_PATCH.get_error_message()}: {error_message}" + ) return sandbox_manager.initialize_sandbox( name=f"eval-sandbox-{problem.name}-{evaluation_run_id}", From 9e53eb6708c60b94a1443b826ee4cec222f1fb02 Mon Sep 17 00:00:00 2001 From: ibraheem-latent Date: Mon, 2 Feb 2026 16:08:15 -0800 Subject: [PATCH 4/6] syntax check in swebench --- .../swebench_verified/swebench_verified_suite.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/evaluator/problem_suites/swebench_verified/swebench_verified_suite.py b/evaluator/problem_suites/swebench_verified/swebench_verified_suite.py index 8d3e4742c..900a210c5 100644 --- a/evaluator/problem_suites/swebench_verified/swebench_verified_suite.py +++ b/evaluator/problem_suites/swebench_verified/swebench_verified_suite.py @@ -11,7 +11,7 @@ from pydantic import BaseModel from utils.docker import get_docker_client from typing import Any, Dict, List, Tuple, Optional -from utils.diff import validate_diff_for_local_repo +from utils.diff import validate_diff_for_local_repo, apply_diff_to_local_repo, validate_patched_files_syntax from evaluator.models import EvaluationRunException from swebench.harness.constants import SWEbenchInstance from utils.temp import create_temp_dir, delete_temp_dir @@ -184,7 +184,14 @@ def initialize_eval_sandbox( f"{EvaluationRunErrorCode.AGENT_INVALID_PATCH.get_error_message()}: {error_message}" ) - + # Syntax-check the patched files + apply_diff_to_local_repo(patch, temp_dir) + is_valid, error_message = validate_patched_files_syntax(temp_dir) + if not is_valid: + raise EvaluationRunException( + EvaluationRunErrorCode.AGENT_INVALID_PATCH, + f"{EvaluationRunErrorCode.AGENT_INVALID_PATCH.get_error_message()}: {error_message}" + ) swebench_instance = problem.userdata From 969a404bc6f78276c60cb48a72e561817e15615f Mon Sep 17 00:00:00 2001 From: ibraheem-latent Date: Mon, 2 Feb 2026 17:24:21 -0800 Subject: [PATCH 5/6] cleanup --- utils/diff.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/utils/diff.py b/utils/diff.py index aee6edcc7..a670d5c7f 100644 --- a/utils/diff.py +++ b/utils/diff.py @@ -120,9 +120,6 @@ def validate_patched_files_syntax(repo_dir: str) -> Tuple[bool, Optional[str]]: (is_valid: bool, error_message: Optional[str]) """ result = subprocess.run(["git", "diff", "--name-only"], cwd=repo_dir, capture_output=True, text=True) - if result.returncode != 0: - return False, f"Failed to get modified files: {result.stderr.strip()}" - modified_files = [f.strip() for f in result.stdout.strip().splitlines() if f.strip()] errors = [] @@ -140,8 +137,6 @@ def validate_patched_files_syntax(repo_dir: str) -> Tuple[bool, Optional[str]]: errors.append(f"{filepath}:{e.lineno}: {e.msg}") elif filepath.endswith((".js", ".mjs")): - with open(full_path, "r") as f: - source = f.read() result = subprocess.run( ["node", "--input-type=module", "--check"], input=source, capture_output=True, text=True ) From ce3a6027ba2a8cf1b758ca3993f030e218a30132 Mon Sep 17 00:00:00 2001 From: ibraheem-latent Date: Fri, 6 Feb 2026 14:48:58 -0800 Subject: [PATCH 6/6] add missing src --- utils/diff.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/diff.py b/utils/diff.py index a670d5c7f..8599437df 100644 --- a/utils/diff.py +++ b/utils/diff.py @@ -137,6 +137,8 @@ def validate_patched_files_syntax(repo_dir: str) -> Tuple[bool, Optional[str]]: errors.append(f"{filepath}:{e.lineno}: {e.msg}") elif filepath.endswith((".js", ".mjs")): + with open(full_path, "r") as f: + source = f.read() result = subprocess.run( ["node", "--input-type=module", "--check"], input=source, capture_output=True, text=True )